chore(infra): Use Regional Instance Group in the GCP NAT example (#4183)

This example will work once our latest gateway is pushed to GitHub
Container Registry, but to test it for now a few overrides can be added
to the `main.tf` to use our Google Artifact Registry and local module
instead:
```diff
module "gateways" {
--  source = "github.com/firezone/firezone/terraform/modules/google-cloud/apps/gateway-region-instance-group"
++  source = "../../../modules/google-cloud/apps/gateway-region-instance-group"
...
++  container_registry = "us-east1-docker.pkg.dev"
++  image_repo         = "firezone-prod/firezone"
++  image              = "gateway"
}
```

---------

Signed-off-by: Jamil <jamilbk@users.noreply.github.com>
Signed-off-by: Andrew Dryga <andrew@dryga.com>
Co-authored-by: Jamil <jamilbk@users.noreply.github.com>
This commit is contained in:
Andrew Dryga
2024-03-19 08:44:14 -06:00
committed by GitHub
parent 24dc7be6bb
commit 09dbd70dc5
20 changed files with 287 additions and 223 deletions

View File

@@ -103,7 +103,7 @@ product documentation, organized as follows:
- [kotlin/](../kotlin/android): Android / ChromeOS clients.
- [website/](../website): Marketing website and product documentation.
- [terraform/](../terraform): Terraform files for various example deployments.
- [terraform/examples/gcp/nat_gateway](../terraform/examples/gcp/nat_gateway):
- [terraform/examples/google-cloud/nat_gateway](../terraform/examples/google-cloud/nat_gateway):
Example Terraform configurations for deploying a cluster of Firezone
gateways behind a NAT gateway on GCP with single egress IP.
- [terraform/modules/google-cloud/apps/gateway-region-instance-group](../terraform/modules/google-cloud/apps/gateway-region-instance-group):

View File

@@ -283,16 +283,10 @@ defmodule Web.Sites.Show do
end
def handle_info(
%Phoenix.Socket.Broadcast{topic: "presences:group_gateways:" <> _group_id} = event,
%Phoenix.Socket.Broadcast{topic: "presences:group_gateways:" <> _group_id},
socket
) do
rendered_gateway_ids = Enum.map(socket.assigns.gateways, & &1.id)
if presence_updates_any_id?(event, rendered_gateway_ids) do
{:noreply, reload_live_table!(socket, "gateways")}
else
{:noreply, socket}
end
{:noreply, reload_live_table!(socket, "gateways")}
end
def handle_event(event, params, socket) when event in ["paginate", "order_by", "filter"],

View File

@@ -4,6 +4,7 @@
**/terraform.tfstate.d
**/terraform.tfvars
out.plan
*.tfstate
# Don't ever commit these files to git
*.p12

View File

@@ -150,7 +150,7 @@ resource "google_compute_firewall" "egress-ipv4" {
destination_ranges = ["0.0.0.0/0"]
allow {
protocol = "udp"
protocol = "all"
}
}
@@ -165,7 +165,7 @@ resource "google_compute_firewall" "egress-ipv6" {
destination_ranges = ["::/0"]
allow {
protocol = "udp"
protocol = "all"
}
}

View File

@@ -27,25 +27,7 @@ module "gateways" {
observability_log_level = "debug"
application_name = "gateway"
application_version = replace(var.image_tag, ".", "-")
health_check = {
name = "health"
protocol = "TCP"
port = 8080
initial_delay_sec = 60
check_interval_sec = 15
timeout_sec = 10
healthy_threshold = 1
unhealthy_threshold = 3
http_health_check = {
request_path = "/healthz"
}
}
application_name = "gateway"
api_url = "wss://api.${local.tld}"
token = var.gateway_token
@@ -69,8 +51,6 @@ resource "google_compute_firewall" "gateways-metabase-access" {
}
}
# curl "http://metabase.c.firezone-prod.internal:3000/" -v
# Allow outbound traffic
resource "google_compute_firewall" "gateways-egress-ipv4" {
count = var.gateway_token != null ? 1 : 0
@@ -85,7 +65,7 @@ resource "google_compute_firewall" "gateways-egress-ipv4" {
destination_ranges = ["0.0.0.0/0"]
allow {
protocol = "udp"
protocol = "all"
}
}
@@ -102,7 +82,7 @@ resource "google_compute_firewall" "gateways-egress-ipv6" {
destination_ranges = ["::/0"]
allow {
protocol = "udp"
protocol = "all"
}
}

View File

@@ -356,7 +356,7 @@ module "domain" {
image = "domain"
image_tag = var.image_tag
scaling_horizontal_replicas = 2
scaling_horizontal_replicas = 1
observability_log_level = "debug"

View File

@@ -149,7 +149,7 @@ resource "google_compute_firewall" "egress-ipv4" {
destination_ranges = ["0.0.0.0/0"]
allow {
protocol = "udp"
protocol = "all"
}
}
@@ -164,7 +164,7 @@ resource "google_compute_firewall" "egress-ipv6" {
destination_ranges = ["::/0"]
allow {
protocol = "udp"
protocol = "all"
}
}

View File

@@ -10,7 +10,7 @@ instructions on how to deploy the example.
### Google Cloud Platform (GCP)
- [NAT Gateway](./gcp/nat_gateway): This example shows how to deploy one or more
Firezone Gateways in a single GCP VPC that is configured with a Cloud NAT for
egress. Read this if you're looking to deploy Firezone Gateways behind a
single, shared static IP address on GCP.
- [NAT Gateway](./google-cloud/nat_gateway): This example shows how to deploy
one or more Firezone Gateways in a single GCP VPC that is configured with a
Cloud NAT for egress. Read this if you're looking to deploy Firezone Gateways
behind a single, shared static IP address on GCP.

View File

@@ -1,112 +0,0 @@
terraform {
required_providers {
google = {
source = "hashicorp/google"
version = "5.19.0"
}
}
}
provider "google" {
project = var.project_id
region = var.region
zone = var.zone
}
resource "google_project_service" "compute-api" {
project = var.project_id
service = "compute.googleapis.com"
}
resource "google_service_account" "firezone" {
account_id = "firezone-gateway"
display_name = "Firezone Gateway Service Account"
}
resource "google_compute_network" "firezone" {
name = "firezone-gateway"
auto_create_subnetworks = true
depends_on = [google_project_service.compute-api]
}
resource "google_compute_router" "firezone" {
name = "firezone-gateway-router"
network = google_compute_network.firezone.id
}
resource "google_compute_router_nat" "firezone" {
name = "firezone-gateway-nat"
router = google_compute_router.firezone.name
nat_ip_allocate_option = "AUTO_ONLY"
source_subnetwork_ip_ranges_to_nat = "ALL_SUBNETWORKS_ALL_IP_RANGES"
}
resource "google_compute_instance_template" "gateway" {
name = "gateway-template"
description = "Instance template for the Firezone Gateway"
instance_description = "Firezone Gateway"
machine_type = var.machine_type
tags = ["firezone-gateway"]
can_ip_forward = true
scheduling {
automatic_restart = true
on_host_maintenance = "MIGRATE"
}
disk {
source_image = "debian-cloud/debian-12"
auto_delete = true
boot = true
}
network_interface {
network = google_compute_network.firezone.id
}
service_account {
email = google_service_account.firezone.email
scopes = ["cloud-platform"]
}
}
// Allow SSH access to the gateways. This is optional but helpful for debugging
// and administration of the gateways. Since they're not publicly accessible,
// you need to tunnel through IAP:
//
// gcloud compute ssh --tunnel-through-iap --project <PROJECT_ID> --zone <ZONE> gateway-0
resource "google_compute_firewall" "ssh-rule" {
name = "allow-ssh"
network = google_compute_network.firezone.name
allow {
protocol = "tcp"
ports = ["22"]
}
target_tags = ["gateway"]
source_ranges = ["0.0.0.0/0"]
}
resource "google_compute_instance_from_template" "gateway" {
name = "gateway-${count.index}"
count = var.replicas
source_instance_template = google_compute_instance_template.gateway.self_link_unique
# Script is defined here to set instance-specific metadata
metadata_startup_script = <<-SCRIPT
#!/usr/bin/env bash
set -euo pipefail
# Install dependencies
sudo apt-get update
sudo apt-get install -y iptables curl
# Set necessary environment variables and run installer
FIREZONE_ID="gateway-${google_compute_instance_template.gateway.id}-${count.index}" \
FIREZONE_TOKEN="${var.token}" \
bash <(curl -fsSL https://raw.githubusercontent.com/firezone/firezone/main/scripts/gateway-systemd-install.sh)
SCRIPT
}

View File

@@ -4,6 +4,9 @@ In this example, we will deploy one or more Firezone Gateways in a single VPC on
Google Cloud Platform (GCP) that are configured to egress traffic through a
single Cloud NAT that is assigned a single static IP address.
This example is built on top of
[Google Cloud Gateway via Regional Instance Group](../../../modules/google-cloud/apps/gateway-region-instance-group/).
## Common use cases
Use this guide to give your Firezone Clients a static public IP address for
@@ -90,11 +93,12 @@ Firezone Gateway(s).
This will incur about a minute or two of downtime as Terraform destroys the
existing Firezone Gateway(s) and deploys new ones in their place.
## Future improvements
## Output
- Minimal downtime upgrades using a regional instance manager.
- Expose the ability to select an existing subnetwork instead of auto-creating a
new one.
- Expose the ability to select an existing
[regional external IP address](https://cloud.google.com/nat/docs/ports-and-addresses)
instead of auto-creating a new one.
`static_ip_addresses` will contain a list of static IP addresses that you can
use to whitelist your Firezone Gateway(s) in your third-party or partner
application.
# Cleanup
To clean up the resources created by this example, run `terraform destroy`.

View File

@@ -0,0 +1,40 @@
provider "google" {
project = var.project_id
region = var.region
}
resource "google_project_service" "compute-api" {
project = var.project_id
service = "compute.googleapis.com"
}
resource "google_service_account" "firezone" {
account_id = "firezone-gateway"
display_name = "Firezone Gateway Service Account"
}
module "gateways" {
source = "github.com/firezone/firezone/terraform/modules/google-cloud/apps/gateway-region-instance-group"
# If you are changing this example along with the module, you should use the local path:
# source = "../../../modules/google-cloud/apps/gateway-region-instance-group"
project_id = var.project_id
compute_network = google_compute_network.firezone.id
compute_subnetwork = google_compute_subnetwork.firezone.id
compute_instance_replicas = var.replicas
compute_instance_type = var.machine_type
compute_region = var.region
# Since we are behind a NAT gateway, we don't need public IP addresses
# to be automatically provisioned for the instances
compute_provision_public_ipv6_address = false
compute_provision_public_ipv4_address = false
image_tag = "1.0"
observability_log_level = "info"
token = var.token
}

View File

@@ -0,0 +1,103 @@
# We create a new network and subnetwork. In real-world scenarios,
# you would likely use an existing ones where your application is deployed.
resource "google_compute_network" "firezone" {
name = "firezone-gateway"
auto_create_subnetworks = false
enable_ula_internal_ipv6 = true
depends_on = [google_project_service.compute-api]
}
resource "google_compute_subnetwork" "firezone" {
project = var.project_id
name = "firezone-gateways"
stack_type = "IPV4_IPV6"
ip_cidr_range = var.subnet_cidr
region = var.region
network = google_compute_network.firezone.id
ipv6_access_type = "INTERNAL"
private_ip_google_access = true
}
# Allocate IPv4 addresses for the NAT gateway
resource "google_compute_address" "ipv4" {
project = var.project_id
name = "firezone-gateway-nat-ipv4"
ip_version = "IPV4"
}
# Create a router and NAT to allow outbound traffic
resource "google_compute_router" "firezone" {
name = "firezone-gateway-router"
network = google_compute_network.firezone.id
}
resource "google_compute_router_nat" "firezone" {
name = "firezone-gateway-nat"
router = google_compute_router.firezone.name
nat_ip_allocate_option = "MANUAL_ONLY"
nat_ips = [
google_compute_address.ipv4.self_link,
]
source_subnetwork_ip_ranges_to_nat = "LIST_OF_SUBNETWORKS"
subnetwork {
name = google_compute_subnetwork.firezone.id
source_ip_ranges_to_nat = ["ALL_IP_RANGES"]
}
}
# Configure Firewall to allow outbound traffic
resource "google_compute_firewall" "gateways-egress-ipv4" {
project = var.project_id
name = "firezone-gateways-egress-ipv4"
network = google_compute_network.firezone.id
direction = "EGRESS"
target_tags = module.gateways.target_tags
destination_ranges = ["0.0.0.0/0"]
allow {
protocol = "all"
}
}
resource "google_compute_firewall" "gateways-egress-ipv6" {
project = var.project_id
name = "firezone-gateways-egress-ipv6"
network = google_compute_network.firezone.id
direction = "EGRESS"
target_tags = module.gateways.target_tags
destination_ranges = ["::/0"]
allow {
protocol = "all"
}
}
# Allow SSH access to the gateways. This is optional but helpful for debugging
# and administration of the gateways. Since they're not publicly accessible,
# you need to tunnel through IAP:
#
# gcloud compute instances list --project <PROJECT_ID>
# gcloud compute ssh --tunnel-through-iap --project <PROJECT_ID> gateway-XXXX
resource "google_compute_firewall" "ssh-rule" {
name = "allow-gateways-ssh"
network = google_compute_network.firezone.id
allow {
protocol = "tcp"
ports = ["22"]
}
target_tags = module.gateways.target_tags
source_ranges = ["35.235.240.0/20"] // IAP CIDR
}

View File

Before

Width:  |  Height:  |  Size: 127 KiB

After

Width:  |  Height:  |  Size: 127 KiB

View File

@@ -0,0 +1,3 @@
output "static_ip_addresses" {
value = [google_compute_address.ipv4.address]
}

View File

@@ -16,11 +16,6 @@ variable "region" {
description = "Region to deploy the Gateway(s) in."
}
variable "zone" {
type = string
description = "Availability to deploy the Gateway(s) in."
}
variable "replicas" {
type = number
description = "Number of Gateway replicas to deploy in the availability zone."
@@ -52,3 +47,8 @@ variable "token" {
type = string
description = "Gateway token to use for authentication."
}
variable "subnet_cidr" {
type = string
description = "CIDR Range to use for subnet where Gateway(s) are deployed"
}

View File

@@ -0,0 +1,8 @@
terraform {
required_providers {
google = {
source = "hashicorp/google"
version = "5.20"
}
}
}

View File

@@ -1,6 +1,11 @@
data "google_compute_zones" "in_region" {
project = var.project_id
region = var.compute_region
}
locals {
application_name = var.application_name != null ? var.application_name : var.image
application_version = var.application_version != null ? var.application_version : var.image_tag
application_version = var.application_version != null ? var.application_version : replace(var.image_tag, ".", "-")
application_labels = merge({
managed_by = "terraform"
@@ -11,7 +16,7 @@ locals {
google_health_check_ip_ranges = [
"130.211.0.0/22",
"35.191.0.0/16"
"35.191.0.0/16",
]
environment_variables = concat([
@@ -52,6 +57,8 @@ locals {
value = "1"
}
], var.application_environment_variables)
compute_region_zones = length(var.compute_instance_availability_zones) == 0 ? data.google_compute_zones.in_region.names : var.compute_instance_availability_zones
}
# Fetch most recent COS image
@@ -96,14 +103,22 @@ resource "google_compute_instance_template" "application" {
stack_type = "IPV4_IPV6"
ipv6_access_config {
network_tier = "PREMIUM"
# Ephimerical IP address
dynamic "ipv6_access_config" {
for_each = var.compute_provision_public_ipv6_address == true ? [true] : []
content {
network_tier = "PREMIUM"
# Ephimerical IP address
}
}
access_config {
network_tier = "PREMIUM"
# Ephimerical IP address
dynamic "access_config" {
for_each = var.compute_provision_public_ipv4_address == true ? [true] : []
content {
network_tier = "PREMIUM"
# Ephimerical IP address
}
}
}
@@ -165,33 +180,33 @@ resource "google_compute_instance_template" "application" {
}
}
# # Create health checks for the application ports
# resource "google_compute_health_check" "port" {
# project = var.project_id
# Create health check
resource "google_compute_health_check" "port" {
project = var.project_id
# name = "${local.application_name}-${var.health_check.name}"
name = "${local.application_name}-${var.health_check.name}"
# check_interval_sec = var.health_check.check_interval_sec != null ? var.health_check.check_interval_sec : 5
# timeout_sec = var.health_check.timeout_sec != null ? var.health_check.timeout_sec : 5
# healthy_threshold = var.health_check.healthy_threshold != null ? var.health_check.healthy_threshold : 2
# unhealthy_threshold = var.health_check.unhealthy_threshold != null ? var.health_check.unhealthy_threshold : 2
check_interval_sec = var.health_check.check_interval_sec != null ? var.health_check.check_interval_sec : 5
timeout_sec = var.health_check.timeout_sec != null ? var.health_check.timeout_sec : 5
healthy_threshold = var.health_check.healthy_threshold != null ? var.health_check.healthy_threshold : 2
unhealthy_threshold = var.health_check.unhealthy_threshold != null ? var.health_check.unhealthy_threshold : 2
# log_config {
# enable = false
# }
log_config {
enable = false
}
# http_health_check {
# port = var.health_check.port
http_health_check {
port = var.health_check.port
# host = var.health_check.http_health_check.host
# request_path = var.health_check.http_health_check.request_path
# response = var.health_check.http_health_check.response
# }
host = var.health_check.http_health_check.host
request_path = var.health_check.http_health_check.request_path
response = var.health_check.http_health_check.response
}
# lifecycle {
# create_before_destroy = true
# }
# }
lifecycle {
create_before_destroy = true
}
}
# Use template to deploy zonal instance group
resource "google_compute_region_instance_group_manager" "application" {
@@ -202,7 +217,7 @@ resource "google_compute_region_instance_group_manager" "application" {
base_instance_name = local.application_name
region = var.compute_region
distribution_policy_zones = var.compute_instance_availability_zones
distribution_policy_zones = local.compute_region_zones
target_size = var.compute_instance_replicas
@@ -214,23 +229,18 @@ resource "google_compute_region_instance_group_manager" "application" {
instance_template = google_compute_instance_template.application.self_link
}
# named_port {
# name = "stun"
# port = 3478
# }
auto_healing_policies {
initial_delay_sec = var.health_check.initial_delay_sec
# auto_healing_policies {
# initial_delay_sec = var.health_check.initial_delay_sec
# health_check = google_compute_health_check.port.self_link
# }
health_check = google_compute_health_check.port.self_link
}
update_policy {
type = "PROACTIVE"
minimal_action = "REPLACE"
max_unavailable_fixed = 1
max_surge_fixed = max(1, var.compute_instance_replicas - 1)
max_unavailable_fixed = max(1, length(local.compute_region_zones))
max_surge_fixed = max(1, var.compute_instance_replicas - 1) + length(local.compute_region_zones)
}
timeouts {
@@ -244,18 +254,18 @@ resource "google_compute_region_instance_group_manager" "application" {
]
}
# ## Open metrics port for the health checks
# resource "google_compute_firewall" "http-health-checks" {
# project = var.project_id
## Open HTTP port for the health checks
resource "google_compute_firewall" "http-health-checks" {
project = var.project_id
# name = "${local.application_name}-healthcheck"
# network = var.compute_network
name = "${local.application_name}-healthcheck"
network = var.compute_network
# source_ranges = local.google_health_check_ip_ranges
# target_tags = ["app-${local.application_name}"]
source_ranges = local.google_health_check_ip_ranges
target_tags = local.application_tags
# allow {
# protocol = var.health_check.protocol
# ports = [var.health_check.port]
# }
# }
allow {
protocol = var.health_check.protocol
ports = [var.health_check.port]
}
}

View File

@@ -107,7 +107,7 @@ write_files:
TimeoutStartSec=0
Restart=always
ExecStartPre=/usr/bin/docker pull ${container_image}
ExecStart=/bin/sh -c 'docker run --rm --name=${container_name} --cap-add=NET_ADMIN --volume /etc/firezone --device="/dev/net/tun:/dev/net/tun" --env FIREZONE_NAME=$(hostname) --env FIREZONE_ID=$(echo $RANDOM$(hostname) | md5sum | head -c 20; echo;) --env-file="/etc/firezone-gateway/.env" ${container_image}'
ExecStart=/bin/sh -c 'docker run --rm --name=${container_name} --cap-add=NET_ADMIN --publish=8080:8080 --volume /etc/firezone --device="/dev/net/tun:/dev/net/tun" --env FIREZONE_NAME=$(hostname) --env FIREZONE_ID=$(echo $RANDOM$(hostname) | md5sum | head -c 20; echo;) --env-file="/etc/firezone-gateway/.env" ${container_image}'
ExecStop=/usr/bin/docker stop gateway
ExecStopPost=/usr/bin/docker rm gateway

View File

@@ -21,7 +21,8 @@ variable "compute_region" {
variable "compute_instance_availability_zones" {
type = list(string)
description = "List of zones in the region defined in `compute_region` where replicas should be deployed."
default = []
description = "List of zones in the region defined in `compute_region` where replicas should be deployed. Empty list means that all available zones will be used."
}
variable "compute_instance_replicas" {
@@ -32,6 +33,18 @@ variable "compute_instance_type" {
type = string
}
variable "compute_provision_public_ipv4_address" {
type = bool
default = true
description = "Whether to provision public IPv4 address for the instances."
}
variable "compute_provision_public_ipv6_address" {
type = bool
default = true
description = "Whether to provision public IPv4 address for the instances."
}
################################################################################
## Container Registry
################################################################################
@@ -39,6 +52,7 @@ variable "compute_instance_type" {
variable "container_registry" {
type = string
nullable = false
default = "ghcr.io"
description = "Container registry URL to pull the image from."
}
@@ -49,6 +63,7 @@ variable "container_registry" {
variable "image_repo" {
type = string
nullable = false
default = "firezone"
description = "Repo of a container image used to deploy the application."
}
@@ -56,6 +71,7 @@ variable "image_repo" {
variable "image" {
type = string
nullable = false
default = "gateway"
description = "Container image used to deploy the application."
}
@@ -86,7 +102,7 @@ variable "observability_log_level" {
variable "application_name" {
type = string
nullable = true
default = null
default = "gateway"
description = "Name of the application. Defaults to value of `var.image_name` with `_` replaced to `-`."
}
@@ -129,6 +145,23 @@ variable "health_check" {
nullable = false
default = {
name = "health"
protocol = "TCP"
port = 8080
initial_delay_sec = 60
check_interval_sec = 15
timeout_sec = 10
healthy_threshold = 1
unhealthy_threshold = 3
http_health_check = {
request_path = "/healthz"
}
}
description = "Health check which will be used for auto healing policy."
}

View File

@@ -447,7 +447,7 @@ resource "google_compute_firewall" "egress-ipv4" {
destination_ranges = ["0.0.0.0/0"]
allow {
protocol = "udp"
protocol = "all"
}
}
@@ -462,6 +462,6 @@ resource "google_compute_firewall" "egress-ipv6" {
destination_ranges = ["::/0"]
allow {
protocol = "udp"
protocol = "all"
}
}