From 9e17352fd6aa50a4669abddc7ebd985123ae50c0 Mon Sep 17 00:00:00 2001 From: Andrew Dryga Date: Tue, 8 Aug 2023 17:15:33 -0500 Subject: [PATCH] Deploy relays (#1706) Will finish once #1705 is merged and stable. cc @thomaseizinger --- .github/workflows/cd.yml | 1 + .github/workflows/rust-pass-checks.yml | 12 +- .github/workflows/rust.yml | 134 ++++++- .github/workflows/terraform.yml | 1 + docker-compose.yml | 6 +- elixir/README.md | 78 +++- rust/Dockerfile | 26 +- rust/Dockerfile.dev | 34 -- rust/docker-init.sh | 19 +- terraform/environments/staging/main.tf | 105 +++++- terraform/environments/staging/variables.tf | 10 + terraform/modules/elixir-app/variables.tf | 2 +- terraform/modules/google-cloud-sql/main.tf | 4 + terraform/modules/relay-app/main.tf | 375 ++++++++++++++++++++ terraform/modules/relay-app/outputs.tf | 11 + terraform/modules/relay-app/services.tf | 84 +++++ terraform/modules/relay-app/variables.tf | 160 +++++++++ 17 files changed, 974 insertions(+), 88 deletions(-) delete mode 100644 rust/Dockerfile.dev create mode 100644 terraform/modules/relay-app/main.tf create mode 100644 terraform/modules/relay-app/outputs.tf create mode 100644 terraform/modules/relay-app/services.tf create mode 100644 terraform/modules/relay-app/variables.tf diff --git a/.github/workflows/cd.yml b/.github/workflows/cd.yml index 49a9f8441..b8c6d41f2 100644 --- a/.github/workflows/cd.yml +++ b/.github/workflows/cd.yml @@ -76,6 +76,7 @@ jobs: env: TF_VAR_api_image_tag: '"${{ github.sha }}"' TF_VAR_web_image_tag: '"${{ github.sha }}"' + TF_VAR_relay_image_tag: '"${{ github.sha }}"' with: workspace: ${{ env.TF_WORKSPACE }} configuration_version: ${{ steps.apply-upload.outputs.configuration_version_id }} diff --git a/.github/workflows/rust-pass-checks.yml b/.github/workflows/rust-pass-checks.yml index 098b7c416..aca2bb04f 100644 --- a/.github/workflows/rust-pass-checks.yml +++ b/.github/workflows/rust-pass-checks.yml @@ -25,11 +25,15 @@ jobs: runs-on: ${{ matrix.runs-on }} steps: - run: 'echo "No build required"' - rust_cross-compile-relay: # cross is separate from test because cross-compiling yields different artifacts and we cannot reuse the cache. - runs-on: ubuntu-latest - steps: - - run: 'echo "No build required"' rust_smoke-test-relay: runs-on: ubuntu-latest steps: - run: 'echo "No build required"' + rust_relay-container-build: + runs-on: ubuntu-latest + steps: + - run: 'echo "No build required"' + rust_gateway-container-build: + runs-on: ubuntu-latest + steps: + - run: 'echo "No build required"' diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index b1e2f0baa..de8b21a1c 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -70,21 +70,6 @@ jobs: - run: cargo clippy --all-targets --all-features -- -D warnings - run: cargo test --all-features - rust_cross-compile-relay: # cross is separate from test because cross-compiling yields different artifacts and we cannot reuse the cache. - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v3 - - # This implicitly triggers installation of the toolchain in the `rust-toolchain.toml` file. - # If we don't do this here, our cache action will compute a cache key based on the Rust version shipped on GitHub's runner which might differ from the one we use. - - run: rustup show - - - uses: Swatinem/rust-cache@v2 - with: - workspaces: ./rust - - run: sudo apt-get install -y musl-tools - - run: cargo build --bin relay --target x86_64-unknown-linux-musl - rust_smoke-test-relay: runs-on: ubuntu-latest defaults: @@ -100,3 +85,122 @@ jobs: with: workspaces: ./rust - run: ./run_smoke_test.sh + + rust_relay-container-build: + runs-on: ubuntu-latest + needs: + - rust_test + - rust_smoke-test-relay + permissions: + contents: read + id-token: "write" + env: + PACKAGE: relay + REGISTRY: us-east1-docker.pkg.dev + GCLOUD_PROJECT: firezone-staging + GOOGLE_CLOUD_PROJECT: firezone-staging + CLOUDSDK_PROJECT: firezone-staging + CLOUDSDK_CORE_PROJECT: firezone-staging + GCP_PROJECT: firezone-staging + steps: + - name: Set up QEMU + uses: docker/setup-qemu-action@v2 + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v2 + - uses: actions/checkout@v3 + - id: auth + uses: google-github-actions/auth@v1 + with: + token_format: "access_token" + workload_identity_provider: "projects/397012414171/locations/global/workloadIdentityPools/github-actions/providers/github-actions" + service_account: "github-actions@github-iam-387915.iam.gserviceaccount.com" + export_environment_variables: false + - name: Change current gcloud account + run: gcloud --quiet config set project ${GCLOUD_PROJECT} + - name: Login to Google Artifact Registry + uses: docker/login-action@v2 + with: + registry: ${{ env.REGISTRY }} + username: oauth2accesstoken + password: ${{ steps.auth.outputs.access_token }} + - name: Build Tag and Version ID + id: vsn + env: + BRANCH_NAME: ${{ github.head_ref || github.ref_name }} + run: | + TAG=$(echo ${BRANCH_NAME} | sed 's/\//_/g' | sed 's/\:/_/g') + echo "TAG=branch-${TAG}" >> $GITHUB_ENV + - name: Build and push Docker image + uses: docker/build-push-action@v4 + with: + platforms: linux/amd64 + build-args: | + PACKAGE=${{ env.PACKAGE }} + context: rust/ + cache-from: type=gha,scope=${{ env.PACKAGE }}-${{ github.sha }} + cache-to: type=gha,mode=max,scope=${{ env.PACKAGE }}-${{ github.sha }} + file: rust/Dockerfile + push: true + tags: + ${{ env.REGISTRY }}/${{ env.GCLOUD_PROJECT }}/firezone/${{ + env.PACKAGE }}:${{ env.TAG }} , ${{ env.REGISTRY }}/${{ + env.GCLOUD_PROJECT }}/firezone/${{ env.PACKAGE }}:${{ github.sha }} + + rust_gateway-container-build: + runs-on: ubuntu-latest + needs: + - rust_test + permissions: + contents: read + id-token: "write" + env: + PACKAGE: gateway + REGISTRY: us-east1-docker.pkg.dev + GCLOUD_PROJECT: firezone-staging + GOOGLE_CLOUD_PROJECT: firezone-staging + CLOUDSDK_PROJECT: firezone-staging + CLOUDSDK_CORE_PROJECT: firezone-staging + GCP_PROJECT: firezone-staging + steps: + - name: Set up QEMU + uses: docker/setup-qemu-action@v2 + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v2 + - uses: actions/checkout@v3 + - id: auth + uses: google-github-actions/auth@v1 + with: + token_format: "access_token" + workload_identity_provider: "projects/397012414171/locations/global/workloadIdentityPools/github-actions/providers/github-actions" + service_account: "github-actions@github-iam-387915.iam.gserviceaccount.com" + export_environment_variables: false + - name: Change current gcloud account + run: gcloud --quiet config set project ${GCLOUD_PROJECT} + - name: Login to Google Artifact Registry + uses: docker/login-action@v2 + with: + registry: ${{ env.REGISTRY }} + username: oauth2accesstoken + password: ${{ steps.auth.outputs.access_token }} + - name: Build Tag and Version ID + id: vsn + env: + BRANCH_NAME: ${{ github.head_ref || github.ref_name }} + run: | + TAG=$(echo ${BRANCH_NAME} | sed 's/\//_/g' | sed 's/\:/_/g') + echo "TAG=branch-${TAG}" >> $GITHUB_ENV + - name: Build and push Docker image + uses: docker/build-push-action@v4 + with: + platforms: linux/amd64 + build-args: | + PACKAGE=${{ env.PACKAGE }} + context: rust/ + cache-from: type=gha,scope=${{ env.PACKAGE }}-${{ github.sha }} + cache-to: type=gha,mode=max,scope=${{ env.PACKAGE }}-${{ github.sha }} + file: rust/Dockerfile + push: true + tags: + ${{ env.REGISTRY }}/${{ env.GCLOUD_PROJECT }}/firezone/${{ + env.PACKAGE }}:${{ env.TAG }} , ${{ env.REGISTRY }}/${{ + env.GCLOUD_PROJECT }}/firezone/${{ env.PACKAGE }}:${{ github.sha }} diff --git a/.github/workflows/terraform.yml b/.github/workflows/terraform.yml index bbfb1189b..5263a5114 100644 --- a/.github/workflows/terraform.yml +++ b/.github/workflows/terraform.yml @@ -52,6 +52,7 @@ jobs: env: TF_VAR_api_image_tag: '"${{ github.sha }}"' TF_VAR_web_image_tag: '"${{ github.sha }}"' + TF_VAR_relay_image_tag: '"${{ github.sha }}"' with: workspace: ${{ env.TF_WORKSPACE }} configuration_version: ${{ steps.plan-upload.outputs.configuration_version_id }} diff --git a/docker-compose.yml b/docker-compose.yml index fbfeefa2e..5f552131e 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -122,7 +122,7 @@ services: RUST_LOG: headless=trace,firezone_client_connlib=trace,firezone_tunnel=trace,libs_common=trace,warn build: context: rust - dockerfile: Dockerfile.dev + dockerfile: Dockerfile args: PACKAGE: headless image: firezone-headless @@ -150,7 +150,7 @@ services: ENABLE_MASQUERADE: 1 build: context: rust - dockerfile: Dockerfile.dev + dockerfile: Dockerfile args: PACKAGE: gateway image: firezone-gateway @@ -190,7 +190,7 @@ services: RUST_BACKTRACE: 1 build: context: rust - dockerfile: Dockerfile.dev + dockerfile: Dockerfile args: PACKAGE: relay image: firezone-relay diff --git a/elixir/README.md b/elixir/README.md index a0784f61b..9f80ce64d 100644 --- a/elixir/README.md +++ b/elixir/README.md @@ -1,15 +1,22 @@ # Welcome to Elixir-land! -This README provides an overview for running and managing Firezone's Elixir-based control plane. +This README provides an overview for running and managing Firezone's +Elixir-based control plane. ## Running Control Plane for local development -You can use the [Top-Level Docker Compose](../docker-compose.yml) to start any services locally. The `web` and `api` compose services are built application releases that are pretty much the same as the ones we run in production, while the `elixir` compose service runs raw Elixir code, without a built release. +You can use the [Top-Level Docker Compose](../docker-compose.yml) to start any +services locally. The `web` and `api` compose services are built application +releases that are pretty much the same as the ones we run in production, while +the `elixir` compose service runs raw Elixir code, without a built release. -This means you'll want to use the `elixir` compose service to run Mix tasks and any Elixir code on-the-fly, but you can't do that in `web`/`api` so easily because Elixir strips out Mix and other tooling [when building an application release](https://hexdocs.pm/mix/Mix.Tasks.Release.html). +This means you'll want to use the `elixir` compose service to run Mix tasks and +any Elixir code on-the-fly, but you can't do that in `web`/`api` so easily +because Elixir strips out Mix and other tooling +[when building an application release](https://hexdocs.pm/mix/Mix.Tasks.Release.html). -`elixir` additionally caches `_build` and `node_modules` to speed up compilation time and syncs -`/apps`, `/config` and other folders with the host machine. +`elixir` additionally caches `_build` and `node_modules` to speed up compilation +time and syncs `/apps`, `/config` and other folders with the host machine. ```bash # Make sure to run this every time code in elixir/ changes, @@ -44,7 +51,8 @@ This means you'll want to use the `elixir` compose service to run Mix tasks and # or ❯ docker-compose run elixir /bin/sh -c "cd apps/domain && mix ecto.seed" -# Start the API service for control plane sockets while listening to STDIN (where you will see all the logs) +# Start the API service for control plane sockets while listening to STDIN +# (where you will see all the logs) ❯ docker-compose up api --build ``` @@ -109,12 +117,15 @@ Now you can verify that it's working by connecting to a websocket: ``` -Note: when you run multiple commands it can hang because Phoenix expects a heartbeat packet every 5 seconds, so it can kill your websocket if you send commands slower than that. +Note: when you run multiple commands it can hang because Phoenix expects a +heartbeat packet every 5 seconds, so it can kill your websocket if you send +commands slower than that.
-You can reset the database (eg. when there is a migration that breaks data model for unreleased versions) using following command: +You can reset the database (eg. when there is a migration that breaks data model +for unreleased versions) using following command: ```bash ❯ docker-compose run elixir /bin/sh -c "cd apps/domain && mix ecto.reset" @@ -162,8 +173,9 @@ Interactive Elixir (1.14.3) - press Ctrl+C to exit (type h() ENTER for help) iex(web@127.0.0.1)1> ``` -From `iex` shell you can run any Elixir code, for example you can emulate a full flow using process messages, -just keep in mind that you need to run seeds before executing this example: +From `iex` shell you can run any Elixir code, for example you can emulate a full +flow using process messages, just keep in mind that you need to run seeds before +executing this example: ```elixir [gateway | _rest_gateways] = Domain.Repo.all(Domain.Gateways.Gateway) @@ -174,9 +186,11 @@ relay_secret = Domain.Crypto.rand_string() :ok = Domain.Relays.connect_relay(relay, relay_secret) ``` -Now if you connect and list resources there will be one online because there is a relay and gateway online. +Now if you connect and list resources there will be one online because there is +a relay and gateway online. -Some of the functions require authorization, here is how you can obtain a subject: +Some of the functions require authorization, here is how you can obtain a +subject: ```elixir user_agent = "User-Agent: iOS/12.7 (iPhone) connlib/0.7.412" @@ -205,9 +219,13 @@ account_id = "c89bcc8c-9392-4dae-a40d-888aef6d28e0" ## Connecting to a staging or production instances -We use Google Cloud Platform for all our staging and production infrastructure. You'll need access to this env to perform the commands below; to get and access you need to add yourself to `project_owners` in `main.tf` for each of the [environments](../terraform/environments). +We use Google Cloud Platform for all our staging and production infrastructure. +You'll need access to this env to perform the commands below; to get and access +you need to add yourself to `project_owners` in `main.tf` for each of the +[environments](../terraform/environments). -This is a danger zone so first of all, ALWAYS make sure on which environment your code is running: +This is a danger zone so first of all, ALWAYS make sure on which environment +your code is running: ```bash ❯ gcloud config get project @@ -246,3 +264,35 @@ Erlang/OTP 25 [erts-13.1.4] [source] [64-bit] [smp:1:1] [ds:1:1:10] [async-threa Interactive Elixir (1.14.3) - press Ctrl+C to exit (type h() ENTER for help) iex(api@api-b02t.us-east1-d.c.firezone-staging.internal)1> ``` + +### Creating an account on staging instance using CLI + +```elixir +❯ gcloud compute ssh web-3vmw + +andrew@web-3vmw ~ $ docker ps --format json | jq '"\(.ID) \(.Image)"' +"09eff3c0ebe8 us-east1-docker.pkg.dev/firezone-staging/firezone/web:b9c11007a4e230ab28f0138afc98188b1956dfd3" + +andrew@web-3vmw ~ $ docker exec -it 09eff3c0ebe8 bin/web remote +Erlang/OTP 26 [erts-14.0.2] [source] [64-bit] [smp:1:1] [ds:1:1:20] [async-threads:1] [jit] + +Interactive Elixir (1.15.2) - press Ctrl+C to exit (type h() ENTER for help) + +iex(web@web-3vmw.us-east1-d.c.firezone-staging.internal)1> {:ok, account} = Domain.Accounts.create_account(%{name: "Firezone", slug: "firezone"}) +{:ok, ...} + +iex(web@web-3vmw.us-east1-d.c.firezone-staging.internal)2> {:ok, magic_link_provider} = Domain.Auth.create_provider(account, %{name: "Magic Link", adapter: :email, adapter_config: %{}}) +{:ok, ...} + +iex(web@web-3vmw.us-east1-d.c.firezone-staging.internal)3> {:ok, actor} = Domain.Actors.create_actor(magic_link_provider, "a@firezone.dev", %{type: :account_admin_user, name: "Andrii Dryga"}) +{:ok, ...} + +iex(web@web-3vmw.us-east1-d.c.firezone-staging.internal)4> identity = hd(actor.identities) +... + +iex(web@web-3vmw.us-east1-d.c.firezone-staging.internal)5> {:ok, identity} = Domain.Auth.Adapters.Email.request_sign_in_token(identity) +{:ok, ...} + +iex(web@web-3vmw.us-east1-d.c.firezone-staging.internal)6> Web.Mailer.AuthEmail.sign_in_link_email(identity) |> Web.Mailer.deliver() +{:ok, %{id: "d24dbe9a-d0f5-4049-ac0d-0df793725a80"}} +``` diff --git a/rust/Dockerfile b/rust/Dockerfile index e049d7e5c..ab6164c9d 100644 --- a/rust/Dockerfile +++ b/rust/Dockerfile @@ -5,26 +5,24 @@ COPY . ./ RUN --mount=type=cache,target=./target \ --mount=type=cache,target=/usr/local/cargo/registry,sharing=locked \ --mount=type=cache,target=/usr/local/rustup,sharing=locked \ - apt update && apt install -y musl-tools && \ - cargo build -p $PACKAGE --release --target x86_64-unknown-linux-musl + cargo build -p $PACKAGE --release RUN --mount=type=cache,target=./target \ - mv ./target/x86_64-unknown-linux-musl/release/$PACKAGE /usr/local/bin/$PACKAGE + mv ./target/release/$PACKAGE /usr/local/bin/$PACKAGE -FROM alpine:3.18 +FROM debian:11.7-slim ARG PACKAGE WORKDIR /app/ COPY --from=BUILDER /usr/local/bin/$PACKAGE . +RUN ln -s ./${PACKAGE} ./app +COPY ./docker-init.sh . ENV RUST_BACKTRACE=1 ENV PATH "/app:$PATH" ENV PACKAGE_NAME ${PACKAGE} -# Some black magics here: -# we need to use `/bin/sh -c` so that the env variable is correctly replaced -# but then everything in `CMD` is placed after the executed string, so we need -# to move it inside, these are passed as the variables `$0`, `$1`, `$2`, etc... -# this means that this will ignore after the first arguments -# if we ever combine this with `CMD` in exec form so always use shell form -# (Note we could use shell-form here, but this is the same made explicit) -ENTRYPOINT ["/bin/sh", "-c", "$PACKAGE_NAME $0"] -# *sigh* if we don't add this $0 becomes /bin/sh in the command above -CMD [""] +RUN apt-get update -y \ + && apt-get install -y iputils-ping iptables lsof iproute2 curl \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* + +ENTRYPOINT ["docker-init.sh"] +CMD ["app"] diff --git a/rust/Dockerfile.dev b/rust/Dockerfile.dev deleted file mode 100644 index d189fc7e9..000000000 --- a/rust/Dockerfile.dev +++ /dev/null @@ -1,34 +0,0 @@ -FROM rust:1.70-slim as BUILDER -ARG PACKAGE -WORKDIR /build/ -COPY . ./ -RUN --mount=type=cache,target=./target \ - --mount=type=cache,target=/usr/local/cargo/registry,sharing=locked \ - --mount=type=cache,target=/usr/local/rustup,sharing=locked \ - cargo build -p $PACKAGE --release - -RUN --mount=type=cache,target=./target \ - mv ./target/release/$PACKAGE /usr/local/bin/$PACKAGE - -FROM debian:11.7-slim -ARG PACKAGE -WORKDIR /app/ -COPY --from=BUILDER /usr/local/bin/$PACKAGE . -COPY ./docker-init.sh . -ENV RUST_BACKTRACE=1 -ENV PATH "/app:$PATH" -ENV PACKAGE_NAME ${PACKAGE} -RUN apt-get update -y \ - && apt-get install -y iputils-ping iptables lsof iproute2 \ - && apt-get clean \ - && rm -rf /var/lib/apt/lists/* -# Some black magics here: -# we need to use `/bin/sh -c` so that the env variable is correctly replaced -# but then everything in `CMD` is placed after the executed string, so we need -# to move it inside, these are passed as the variables `$0`, `$1`, `$2`, etc... -# this means that this will ignore after the first arguments -# if we ever combine this with `CMD` in exec form so always use shell form -# (Note we could use shell-form here, but this is the same made explicit) -ENTRYPOINT ["/bin/sh", "-c", "./docker-init.sh && $PACKAGE_NAME $0"] -# *sigh* if we don't add this $0 becomes /bin/sh in the command above -CMD [""] diff --git a/rust/docker-init.sh b/rust/docker-init.sh index 1aec14193..09d439028 100755 --- a/rust/docker-init.sh +++ b/rust/docker-init.sh @@ -1,5 +1,6 @@ -#!/bin/sh -if [ $ENABLE_MASQUERADE = "1" ]; then +#!/bin/bash + +if [[ "${ENABLE_MASQUERADE}" == "1" ]]; then IFACE="utun" iptables -A FORWARD -i $IFACE -j ACCEPT iptables -A FORWARD -o $IFACE -j ACCEPT @@ -8,3 +9,17 @@ if [ $ENABLE_MASQUERADE = "1" ]; then ip6tables -A FORWARD -o $IFACE -j ACCEPT ip6tables -t nat -A POSTROUTING -o eth+ -j MASQUERADE fi + +if [[ "${LISTEN_ADDRESS_DISCOVERY_METHOD}" == "gce_metadata" ]]; then + export PUBLIC_IP4_ADDR=$(curl "http://metadata.google.internal/computeMetadata/v1/instance/network-interfaces/0/access-configs/0/external-ip" -H "Metadata-Flavor: Google" -s) + export LISTEN_IP4_ADDR=$PUBLIC_IP4_ADDR + echo "Discovered PUBLIC_IP4_ADDR: ${PUBLIC_IP4_ADDR}" + echo "Discovered LISTEN_IP4_ADDR: ${LISTEN_IP4_ADDR}" +fi + +# if first arg looks like a flag, assume we want to run postgres server +if [ "${1:0:1}" = '-' ]; then + set -- ${PACKAGE_NAME} "$@" +fi + +exec "$@" diff --git a/terraform/environments/staging/main.tf b/terraform/environments/staging/main.tf index d226bd46e..80c83e22a 100644 --- a/terraform/environments/staging/main.tf +++ b/terraform/environments/staging/main.tf @@ -1,6 +1,7 @@ locals { project_owners = [ "a@firezone.dev", + "bmanifold@firezone.dev", "gabriel@firezone.dev", "jamil@firezone.dev" ] @@ -209,6 +210,7 @@ resource "random_password" "web_db_password" { length = 16 } +# TODO: raname it to "firezone" resource "google_sql_user" "web" { project = module.google-cloud-project.project.project_id @@ -226,7 +228,7 @@ resource "google_sql_database" "firezone" { } locals { - target_tags = ["app-web", "app-api"] + target_tags = ["app-web", "app-api", "app-relay"] cluster = { name = "firezone" @@ -343,6 +345,10 @@ locals { name = "OUTBOUND_EMAIL_ADAPTER" value = "Elixir.Swoosh.Adapters.Postmark" }, + { + name = "OUTBOUND_EMAIL_FROM" + value = "support@firez.one" + }, { name = "OUTBOUND_EMAIL_ADAPTER_OPTS" value = "{\"api_key\":\"${var.postmark_server_api_token}\"}" @@ -536,6 +542,103 @@ resource "google_project_iam_member" "application" { member = "serviceAccount:${each.value}" } +# Deploy relays +module "relays" { + count = var.relay_portal_token != null ? 1 : 0 + + source = "../../modules/relay-app" + project_id = module.google-cloud-project.project.project_id + + instances = { + "asia-east1" = { + type = "n1-standard-1" + replicas = 1 + zones = ["asia-east1-a"] + } + + "asia-south1" = { + type = "n1-standard-1" + replicas = 1 + zones = ["asia-south1-a"] + } + + "australia-southeast1" = { + type = "n1-standard-1" + replicas = 1 + zones = ["australia-southeast1-a"] + } + + "me-central1" = { + type = "n2-standard-2" + replicas = 1 + zones = ["me-central1-a"] + } + + "europe-west1" = { + type = "n1-standard-1" + replicas = 1 + zones = ["europe-west1-d"] + } + + "southamerica-east1" = { + type = "n1-standard-1" + replicas = 1 + zones = ["southamerica-east1-b"] + } + + "us-east1" = { + type = "n1-standard-1" + replicas = 1 + zones = ["us-east1-d"] + } + + "us-west2" = { + type = "n1-standard-1" + replicas = 1 + zones = ["us-west2-b"] + } + + "us-central1" = { + type = "n1-standard-1" + replicas = 1 + zones = ["us-central1-b"] + } + } + + vpc_network = "projects/${module.google-cloud-project.project.project_id}/global/networks/default" + + container_registry = module.google-artifact-registry.url + + image_repo = module.google-artifact-registry.repo + image = "relay" + image_tag = var.relay_image_tag + + observability_log_level = "debug" + + application_name = "relay" + application_version = "0-0-1" + + health_check = { + name = "metrics" + protocol = "TCP" + port = 8080 + + initial_delay_sec = 30 + + check_interval_sec = 5 + timeout_sec = 5 + healthy_threshold = 1 + unhealthy_threshold = 2 + + http_health_check = { + request_path = "/metrics" + } + } + + portal_websocket_url = "wss://api.${local.tld}" + portal_token = var.relay_portal_token +} + # Enable SSH on staging resource "google_compute_firewall" "ssh" { project = module.google-cloud-project.project.project_id diff --git a/terraform/environments/staging/variables.tf b/terraform/environments/staging/variables.tf index 1d0e0464f..549903dbf 100644 --- a/terraform/environments/staging/variables.tf +++ b/terraform/environments/staging/variables.tf @@ -8,6 +8,16 @@ variable "web_image_tag" { description = "Image tag for the web service" } +variable "relay_image_tag" { + type = string + description = "Image tag for the relay service" +} + +variable "relay_portal_token" { + type = string + default = null +} + variable "slack_alerts_channel" { type = string description = "Slack channel which will receive monitoring alerts" diff --git a/terraform/modules/elixir-app/variables.tf b/terraform/modules/elixir-app/variables.tf index 59d914721..0adf18386 100644 --- a/terraform/modules/elixir-app/variables.tf +++ b/terraform/modules/elixir-app/variables.tf @@ -253,7 +253,7 @@ variable "application_ports" { nullable = false default = [] - description = "List of ports to expose for the application. One of ports MUST be named 'http' for auth healing policy to work." + description = "List of ports to expose for the application. One of ports MUST be named 'http' for auto healing policy to work." } variable "application_environment_variables" { diff --git a/terraform/modules/google-cloud-sql/main.tf b/terraform/modules/google-cloud-sql/main.tf index 9ef974a22..6a90b7721 100644 --- a/terraform/modules/google-cloud-sql/main.tf +++ b/terraform/modules/google-cloud-sql/main.tf @@ -81,6 +81,10 @@ resource "google_sql_database_instance" "master" { ip_configuration { ipv4_enabled = length(var.database_read_replica_locations) > 0 ? false : true private_network = var.network + + authorized_networks { + value = "70.112.0.53/32" + } } maintenance_window { diff --git a/terraform/modules/relay-app/main.tf b/terraform/modules/relay-app/main.tf new file mode 100644 index 000000000..f693f35e3 --- /dev/null +++ b/terraform/modules/relay-app/main.tf @@ -0,0 +1,375 @@ +locals { + application_name = var.application_name != null ? var.application_name : var.image + application_version = var.application_version != null ? var.application_version : var.image_tag + + application_labels = merge({ + managed_by = "terraform" + application = local.application_name + version = local.application_version + }, var.application_labels) + + google_health_check_ip_ranges = [ + "130.211.0.0/22", + "35.191.0.0/16" + ] + + environment_variables = concat([ + { + name = "LISTEN_ADDRESS_DISCOVERY_METHOD" + value = "gce_metadata" + }, + { + name = "RUST_LOG" + value = var.observability_log_level + }, + { + name = "JSON_LOG" + value = "true" + }, + { + name = "METRICS_ADDR" + value = "0.0.0.0:8080" + }, + { + name = "PORTAL_TOKEN" + value = var.portal_token + }, + { + name = "PORTAL_WS_URL" + value = var.portal_websocket_url + } + ], var.application_environment_variables) +} + +# Fetch most recent COS image +data "google_compute_image" "coreos" { + family = "cos-105-lts" + project = "cos-cloud" +} + +# Create IAM role for the application instances +resource "google_service_account" "application" { + project = var.project_id + + account_id = "app-${local.application_name}" + display_name = "${local.application_name} app" + description = "Service account for ${local.application_name} application instances." +} + +## Allow application service account to pull images from the container registry +resource "google_project_iam_member" "artifacts" { + project = var.project_id + + role = "roles/artifactregistry.reader" + + member = "serviceAccount:${google_service_account.application.email}" +} + +## Allow fluentbit to injest logs +resource "google_project_iam_member" "logs" { + project = var.project_id + + role = "roles/logging.logWriter" + + member = "serviceAccount:${google_service_account.application.email}" +} + +## Allow reporting application errors +resource "google_project_iam_member" "errors" { + project = var.project_id + + role = "roles/errorreporting.writer" + + member = "serviceAccount:${google_service_account.application.email}" +} + +## Allow reporting metrics +resource "google_project_iam_member" "metrics" { + project = var.project_id + + role = "roles/monitoring.metricWriter" + + member = "serviceAccount:${google_service_account.application.email}" +} + +## Allow reporting metrics +resource "google_project_iam_member" "service_management" { + project = var.project_id + + role = "roles/servicemanagement.reporter" + + member = "serviceAccount:${google_service_account.application.email}" +} + +## Allow appending traces +resource "google_project_iam_member" "cloudtrace" { + project = var.project_id + + role = "roles/cloudtrace.agent" + + member = "serviceAccount:${google_service_account.application.email}" +} + +resource "google_compute_instance_template" "application" { + for_each = var.instances + + project = var.project_id + + name_prefix = "${local.application_name}-${each.key}-" + + description = "This template is used to create ${local.application_name} instances." + + machine_type = each.value.type + + can_ip_forward = false + + tags = ["app-${local.application_name}"] + + labels = merge({ + container-vm = data.google_compute_image.coreos.name + }, local.application_labels) + + scheduling { + automatic_restart = true + on_host_maintenance = "MIGRATE" + provisioning_model = "STANDARD" + } + + disk { + source_image = data.google_compute_image.coreos.self_link + auto_delete = true + boot = true + } + + network_interface { + network = var.vpc_network + + access_config { + network_tier = "PREMIUM" + # Ephimerical IP address + } + } + + service_account { + email = google_service_account.application.email + + scopes = [ + # Those are copying gke-default scopes + "storage-ro", + "logging-write", + "monitoring", + "service-management", + "service-control", + "trace", + # Required to discover the other instances in the Erlang Cluster + "compute-ro", + ] + } + + metadata = merge({ + gce-container-declaration = yamlencode({ + spec = { + containers = [{ + name = local.application_name != null ? local.application_name : var.image + image = "${var.container_registry}/${var.image_repo}/${var.image}:${var.image_tag}" + env = local.environment_variables + }] + + volumes = [] + + restartPolicy = "Always" + } + }) + + # Enable FluentBit agent for logging, which will be default one from COS 109 + google-logging-enabled = "true" + google-logging-use-fluentbit = "true" + + # Report health-related metrics to Cloud Monitoring + google-monitoring-enabled = "true" + }) + + depends_on = [ + google_project_service.compute, + google_project_service.pubsub, + google_project_service.bigquery, + google_project_service.container, + google_project_service.stackdriver, + google_project_service.logging, + google_project_service.monitoring, + google_project_service.cloudprofiler, + google_project_service.cloudtrace, + google_project_service.servicenetworking, + google_project_iam_member.artifacts, + google_project_iam_member.logs, + google_project_iam_member.errors, + google_project_iam_member.metrics, + google_project_iam_member.service_management, + google_project_iam_member.cloudtrace, + ] + + lifecycle { + create_before_destroy = true + } +} + +# Create health checks for the application ports +resource "google_compute_health_check" "port" { + project = var.project_id + + name = "${local.application_name}-${var.health_check.name}" + + check_interval_sec = var.health_check.check_interval_sec != null ? var.health_check.check_interval_sec : 5 + timeout_sec = var.health_check.timeout_sec != null ? var.health_check.timeout_sec : 5 + healthy_threshold = var.health_check.healthy_threshold != null ? var.health_check.healthy_threshold : 2 + unhealthy_threshold = var.health_check.unhealthy_threshold != null ? var.health_check.unhealthy_threshold : 2 + + log_config { + enable = false + } + + http_health_check { + port = var.health_check.port + + host = var.health_check.http_health_check.host + request_path = var.health_check.http_health_check.request_path + response = var.health_check.http_health_check.response + } +} + +# Use template to deploy zonal instance group +resource "google_compute_region_instance_group_manager" "application" { + for_each = var.instances + + project = var.project_id + + name = "${local.application_name}-group-${each.key}" + + base_instance_name = local.application_name + + region = each.key + distribution_policy_zones = each.value.zones + + target_size = each.value.replicas + + wait_for_instances = true + wait_for_instances_status = "STABLE" + + version { + instance_template = google_compute_instance_template.application[each.key].self_link + } + + named_port { + name = "stun" + port = 3478 + } + + auto_healing_policies { + initial_delay_sec = var.health_check.initial_delay_sec + + health_check = google_compute_health_check.port.self_link + } + + update_policy { + type = "PROACTIVE" + minimal_action = "REPLACE" + + max_unavailable_fixed = 0 + max_surge_fixed = max(length(each.value.zones), each.value.replicas - 1) + } + + depends_on = [ + google_compute_instance_template.application + ] +} + +# Define a security policy which allows to filter traffic by IP address, +# an edge security policy can also detect and block common types of web attacks +resource "google_compute_security_policy" "default" { + project = var.project_id + + name = local.application_name + + rule { + action = "allow" + priority = "2147483647" + + match { + versioned_expr = "SRC_IPS_V1" + + config { + src_ip_ranges = ["*"] + } + } + + description = "default allow rule" + } +} + +# Open ports for the web +resource "google_compute_firewall" "stun-turn" { + project = var.project_id + + name = "${local.application_name}-firewall-lb-to-instances" + network = var.vpc_network + + source_ranges = ["0.0.0.0/0"] + target_tags = ["app-${local.application_name}"] + + allow { + protocol = "tcp" + ports = ["3478", "49152-65535"] + } + + allow { + protocol = "udp" + ports = ["3478", "49152-65535"] + } +} + +## Open metrics port for the health checks +resource "google_compute_firewall" "http-health-checks" { + project = var.project_id + + name = "${local.application_name}-healthcheck" + network = var.vpc_network + + source_ranges = local.google_health_check_ip_ranges + target_tags = ["app-${local.application_name}"] + + allow { + protocol = var.health_check.protocol + ports = [var.health_check.port] + } +} + +# Allow outbound traffic +resource "google_compute_firewall" "egress-ipv4" { + project = var.project_id + + name = "${local.application_name}-egress-ipv4" + network = var.vpc_network + direction = "EGRESS" + + target_tags = ["app-${local.application_name}"] + destination_ranges = ["0.0.0.0/0"] + + allow { + protocol = "udp" + } +} + +resource "google_compute_firewall" "egress-ipv6" { + project = var.project_id + + name = "${local.application_name}-egress-ipv6" + network = var.vpc_network + direction = "EGRESS" + + target_tags = ["app-${local.application_name}"] + destination_ranges = ["::/0"] + + allow { + protocol = "udp" + } +} diff --git a/terraform/modules/relay-app/outputs.tf b/terraform/modules/relay-app/outputs.tf new file mode 100644 index 000000000..c4af9cd2d --- /dev/null +++ b/terraform/modules/relay-app/outputs.tf @@ -0,0 +1,11 @@ +output "service_account" { + value = google_service_account.application +} + +output "target_tags" { + value = ["app-${local.application_name}"] +} + +output "instances" { + value = var.instances +} diff --git a/terraform/modules/relay-app/services.tf b/terraform/modules/relay-app/services.tf new file mode 100644 index 000000000..76e80a208 --- /dev/null +++ b/terraform/modules/relay-app/services.tf @@ -0,0 +1,84 @@ + +resource "google_project_service" "compute" { + project = var.project_id + service = "compute.googleapis.com" + + disable_on_destroy = false +} + +resource "google_project_service" "pubsub" { + project = var.project_id + service = "pubsub.googleapis.com" + + disable_on_destroy = false +} + +resource "google_project_service" "bigquery" { + project = var.project_id + service = "bigquery.googleapis.com" + + disable_on_destroy = false +} + +resource "google_project_service" "container" { + project = var.project_id + service = "container.googleapis.com" + + depends_on = [ + google_project_service.compute, + google_project_service.pubsub, + google_project_service.bigquery, + ] + + disable_on_destroy = false +} + +resource "google_project_service" "stackdriver" { + project = var.project_id + service = "stackdriver.googleapis.com" + + disable_on_destroy = false +} + +resource "google_project_service" "logging" { + project = var.project_id + service = "logging.googleapis.com" + + disable_on_destroy = false + + depends_on = [google_project_service.stackdriver] +} + +resource "google_project_service" "monitoring" { + project = var.project_id + service = "monitoring.googleapis.com" + + disable_on_destroy = false + + depends_on = [google_project_service.stackdriver] +} + +resource "google_project_service" "cloudprofiler" { + project = var.project_id + service = "cloudprofiler.googleapis.com" + + disable_on_destroy = false + + depends_on = [google_project_service.stackdriver] +} + +resource "google_project_service" "cloudtrace" { + project = var.project_id + service = "cloudtrace.googleapis.com" + + disable_on_destroy = false + + depends_on = [google_project_service.stackdriver] +} + +resource "google_project_service" "servicenetworking" { + project = var.project_id + service = "servicenetworking.googleapis.com" + + disable_on_destroy = false +} diff --git a/terraform/modules/relay-app/variables.tf b/terraform/modules/relay-app/variables.tf new file mode 100644 index 000000000..588154ae4 --- /dev/null +++ b/terraform/modules/relay-app/variables.tf @@ -0,0 +1,160 @@ +variable "project_id" { + type = string + description = "ID of a Google Cloud Project" +} + +################################################################################ +## Compute +################################################################################ + +variable "compute_instance_type" { + type = string + description = "Type of the instance." + default = "n1-standard-1" +} + +variable "instances" { + type = map(object({ + type = string + replicas = number + zones = list(string) + })) + + description = "List deployment locations for the application." +} + +################################################################################ +## VPC +################################################################################ + +variable "vpc_network" { + description = "ID of a VPC which will be used to deploy the application." + type = string +} + +################################################################################ +## Container Registry +################################################################################ + +variable "container_registry" { + type = string + nullable = false + description = "Container registry URL to pull the image from." +} + +################################################################################ +## Container Image +################################################################################ + +variable "image_repo" { + type = string + nullable = false + + description = "Repo of a container image used to deploy the application." +} + +variable "image" { + type = string + nullable = false + + description = "Container image used to deploy the application." +} + +variable "image_tag" { + type = string + nullable = false + + description = "Container image used to deploy the application." +} + +################################################################################ +## Observability +################################################################################ + +variable "observability_log_level" { + type = string + nullable = false + default = "info" + + description = "Sets RUST_LOG environment variable which applications should use to configure Rust Logger. Default: 'info'." +} + +################################################################################ +## Application +################################################################################ + +variable "application_name" { + type = string + nullable = true + default = null + + description = "Name of the application. Defaults to value of `var.image_name` with `_` replaced to `-`." +} + +variable "application_version" { + type = string + nullable = true + default = null + + description = "Version of the application. Defaults to value of `var.image_tag`." +} + +variable "application_labels" { + type = map(string) + nullable = false + default = {} + + description = "Labels to add to all created by this module resources." +} + +variable "health_check" { + type = object({ + name = string + protocol = string + port = number + + initial_delay_sec = number + check_interval_sec = optional(number) + timeout_sec = optional(number) + healthy_threshold = optional(number) + unhealthy_threshold = optional(number) + + http_health_check = optional(object({ + host = optional(string) + request_path = optional(string) + port = optional(string) + response = optional(string) + })) + }) + + nullable = false + + description = "Health check which will be used for auto healing policy." +} + +variable "application_environment_variables" { + type = list(object({ + name = string + value = string + })) + + nullable = false + default = [] + + description = "List of environment variables to set for all application containers." +} + +################################################################################ +## Firezone +################################################################################ + +variable "portal_token" { + type = string + description = "Portal token to use for authentication." +} + +variable "portal_websocket_url" { + type = string + default = "wss://api.firezone.dev" + description = "URL of the control plane endpoint." +}