mirror of
https://github.com/outbackdingo/firezone.git
synced 2026-04-07 23:06:54 +00:00
I've managed to finally reserve enough e2 instances for our needs and also used e2 for gateways to workaround the quota issues. The `web` app still used n2 because quota doesn't allow additional n4's. Rollouts also fixed to not go over the reservations/quotas.
376 lines
11 KiB
HCL
376 lines
11 KiB
HCL
locals {
|
|
application_name = var.application_name != null ? var.application_name : var.image
|
|
application_version = var.application_version != null ? var.application_version : var.image_tag
|
|
|
|
application_labels = merge({
|
|
managed_by = "terraform"
|
|
|
|
# Note: this labels are used to fetch a release name for Erlang Cluster
|
|
application = local.application_name
|
|
}, var.application_labels)
|
|
|
|
application_environment_variables = concat([
|
|
{
|
|
name = "RELEASE_HOST_DISCOVERY_METHOD"
|
|
value = "gce_metadata"
|
|
},
|
|
{
|
|
name = "PHOENIX_EXTERNAL_TRUSTED_PROXIES"
|
|
value = jsonencode(concat(
|
|
[
|
|
"35.191.0.0/16",
|
|
"130.211.0.0/22"
|
|
],
|
|
google_compute_global_address.ipv4[*].address,
|
|
google_compute_global_address.ipv6[*].address
|
|
))
|
|
},
|
|
{
|
|
name = "LOG_LEVEL"
|
|
value = var.observability_log_level
|
|
},
|
|
{
|
|
name = "OTLP_ENDPOINT",
|
|
value = "http://localhost:4318"
|
|
},
|
|
{
|
|
name = "OTEL_RESOURCE_ATTRIBUTES"
|
|
value = "application.name=${local.application_name}"
|
|
},
|
|
{
|
|
name = "TELEMETRY_METRICS_REPORTER"
|
|
value = "Elixir.Domain.Telemetry.GoogleCloudMetricsReporter"
|
|
},
|
|
{
|
|
name = "TELEMETRY_METRICS_REPORTER_OPTS"
|
|
value = jsonencode({
|
|
project_id = var.project_id
|
|
})
|
|
},
|
|
{
|
|
name = "LOGGER_FORMATTER"
|
|
value = "Elixir.LoggerJSON.Formatters.GoogleCloud"
|
|
},
|
|
{
|
|
name = "LOGGER_FORMATTER_OPTS"
|
|
value = jsonencode({
|
|
project_id = var.project_id
|
|
})
|
|
},
|
|
{
|
|
name = "PLATFORM_ADAPTER"
|
|
value = "Elixir.Domain.GoogleCloudPlatform"
|
|
},
|
|
{
|
|
name = "PLATFORM_ADAPTER_CONFIG"
|
|
value = jsonencode({
|
|
project_id = var.project_id
|
|
service_account_email = google_service_account.application.email
|
|
})
|
|
}
|
|
], var.application_environment_variables)
|
|
|
|
application_ports_by_name = { for port in var.application_ports : port.name => port }
|
|
}
|
|
|
|
# Fetch most recent COS image
|
|
data "google_compute_image" "coreos" {
|
|
family = "cos-113-lts"
|
|
project = "cos-cloud"
|
|
}
|
|
|
|
# Reserve instances for the application
|
|
# If you don't reserve them deployment takes much longer and there is no guarantee that instances will be created at all,
|
|
# Google Cloud Platform does not guarantee that instances will be available when you need them.
|
|
resource "google_compute_reservation" "reservation" {
|
|
# for_each = toset(var.compute_instance_availability_zones)
|
|
|
|
project = var.project_id
|
|
|
|
# name = "${local.application_name}-${each.key}-${var.compute_instance_type}"
|
|
name = "${local.application_name}-${element(var.compute_instance_availability_zones, length(var.compute_instance_availability_zones) - 1)}-${var.compute_instance_type}"
|
|
# zone = each.key
|
|
zone = element(var.compute_instance_availability_zones, length(var.compute_instance_availability_zones) - 1)
|
|
|
|
specific_reservation_required = true
|
|
|
|
specific_reservation {
|
|
count = var.scaling_horizontal_replicas
|
|
# count = ceil(var.scaling_horizontal_replicas / length(var.compute_instance_availability_zones))
|
|
|
|
instance_properties {
|
|
machine_type = var.compute_instance_type
|
|
}
|
|
}
|
|
}
|
|
|
|
# Deploy app
|
|
resource "google_compute_instance_template" "application" {
|
|
project = var.project_id
|
|
|
|
name_prefix = "${local.application_name}-"
|
|
|
|
description = "This template is used to create ${local.application_name} instances."
|
|
|
|
machine_type = var.compute_instance_type
|
|
region = var.compute_instance_region
|
|
|
|
can_ip_forward = false
|
|
|
|
tags = ["app-${local.application_name}"]
|
|
|
|
labels = merge({
|
|
container-vm = data.google_compute_image.coreos.name
|
|
|
|
# This variable can be used by Erlang Cluster not to join nodes of older versions
|
|
version = local.application_version
|
|
}, local.application_labels)
|
|
|
|
|
|
scheduling {
|
|
automatic_restart = true
|
|
on_host_maintenance = "MIGRATE"
|
|
provisioning_model = "STANDARD"
|
|
}
|
|
|
|
reservation_affinity {
|
|
type = "SPECIFIC_RESERVATION"
|
|
|
|
specific_reservation {
|
|
key = "compute.googleapis.com/reservation-name"
|
|
# *Regional* instance group can consume only one reservation, which is zonal by default,
|
|
# so we are always locked to one zone per region until Google Cloud Platform will fix that.
|
|
# values = [for r in google_compute_reservation.reservation : r.name]
|
|
values = [google_compute_reservation.reservation.name]
|
|
}
|
|
}
|
|
|
|
disk {
|
|
source_image = data.google_compute_image.coreos.self_link
|
|
auto_delete = true
|
|
boot = true
|
|
disk_type = var.compute_boot_disk_type
|
|
}
|
|
|
|
network_interface {
|
|
subnetwork = var.vpc_subnetwork
|
|
stack_type = "IPV4_IPV6"
|
|
|
|
ipv6_access_config {
|
|
network_tier = "PREMIUM"
|
|
}
|
|
}
|
|
|
|
service_account {
|
|
email = google_service_account.application.email
|
|
|
|
scopes = concat([
|
|
# Those are default scopes
|
|
"https://www.googleapis.com/auth/devstorage.read_only",
|
|
"https://www.googleapis.com/auth/logging.write",
|
|
"https://www.googleapis.com/auth/monitoring.write",
|
|
"https://www.googleapis.com/auth/service.management.readonly",
|
|
"https://www.googleapis.com/auth/servicecontrol",
|
|
"https://www.googleapis.com/auth/trace.append",
|
|
# Required to discover the other instances in the Erlang Cluster
|
|
"https://www.googleapis.com/auth/compute.readonly"
|
|
], var.application_token_scopes)
|
|
}
|
|
|
|
shielded_instance_config {
|
|
enable_integrity_monitoring = true
|
|
enable_secure_boot = false
|
|
enable_vtpm = true
|
|
}
|
|
|
|
metadata = {
|
|
gce-container-declaration = yamlencode({
|
|
spec = {
|
|
containers = [{
|
|
name = local.application_name != null ? local.application_name : var.image
|
|
image = "${var.container_registry}/${var.image_repo}/${var.image}:${var.image_tag}"
|
|
env = local.application_environment_variables
|
|
}]
|
|
|
|
volumes = []
|
|
|
|
restartPolicy = "Always"
|
|
}
|
|
})
|
|
|
|
user-data = templatefile("${path.module}/templates/cloud-init.yaml", {})
|
|
|
|
google-logging-enabled = "true"
|
|
google-logging-use-fluentbit = "true"
|
|
|
|
# Report health-related metrics to Cloud Monitoring
|
|
google-monitoring-enabled = "true"
|
|
}
|
|
|
|
depends_on = [
|
|
google_project_service.compute,
|
|
google_project_service.pubsub,
|
|
google_project_service.bigquery,
|
|
google_project_service.container,
|
|
google_project_service.stackdriver,
|
|
google_project_service.logging,
|
|
google_project_service.monitoring,
|
|
google_project_service.cloudprofiler,
|
|
google_project_service.cloudtrace,
|
|
google_project_service.servicenetworking,
|
|
google_project_iam_member.artifacts,
|
|
google_project_iam_member.logs,
|
|
google_project_iam_member.errors,
|
|
google_project_iam_member.metrics,
|
|
google_project_iam_member.service_management,
|
|
google_project_iam_member.cloudtrace,
|
|
google_compute_reservation.reservation,
|
|
]
|
|
|
|
lifecycle {
|
|
create_before_destroy = true
|
|
}
|
|
}
|
|
|
|
# Create health checks for the application ports
|
|
resource "google_compute_health_check" "port" {
|
|
for_each = { for port in var.application_ports : port.name => port if try(port.health_check, null) != null }
|
|
|
|
project = var.project_id
|
|
|
|
name = "${local.application_name}-${each.key}"
|
|
|
|
check_interval_sec = each.value.health_check.check_interval_sec != null ? each.value.health_check.check_interval_sec : 5
|
|
timeout_sec = each.value.health_check.timeout_sec != null ? each.value.health_check.timeout_sec : 5
|
|
healthy_threshold = each.value.health_check.healthy_threshold != null ? each.value.health_check.healthy_threshold : 2
|
|
unhealthy_threshold = each.value.health_check.unhealthy_threshold != null ? each.value.health_check.unhealthy_threshold : 2
|
|
|
|
log_config {
|
|
enable = false
|
|
}
|
|
|
|
dynamic "tcp_health_check" {
|
|
for_each = try(each.value.health_check.tcp_health_check, null)[*]
|
|
|
|
content {
|
|
port = each.value.port
|
|
|
|
response = lookup(tcp_health_check.value, "response", null)
|
|
}
|
|
}
|
|
|
|
dynamic "http_health_check" {
|
|
for_each = try(each.value.health_check.http_health_check, null)[*]
|
|
|
|
content {
|
|
port = each.value.port
|
|
|
|
host = lookup(http_health_check.value, "host", null)
|
|
request_path = lookup(http_health_check.value, "request_path", null)
|
|
response = lookup(http_health_check.value, "response", null)
|
|
}
|
|
}
|
|
|
|
dynamic "https_health_check" {
|
|
for_each = try(each.value.health_check.https_health_check, null)[*]
|
|
|
|
content {
|
|
port = each.value.port
|
|
|
|
host = lookup(https_health_check.value, "host", null)
|
|
request_path = lookup(https_health_check.value, "request_path", null)
|
|
response = lookup(http_health_check.value, "response", null)
|
|
}
|
|
}
|
|
|
|
depends_on = [
|
|
google_project_service.compute,
|
|
google_project_service.servicenetworking,
|
|
]
|
|
}
|
|
|
|
# Use template to deploy zonal instance group
|
|
resource "google_compute_region_instance_group_manager" "application" {
|
|
project = var.project_id
|
|
|
|
name = "${local.application_name}-group"
|
|
|
|
base_instance_name = local.application_name
|
|
region = var.compute_instance_region
|
|
distribution_policy_zones = var.compute_instance_availability_zones
|
|
|
|
target_size = var.scaling_horizontal_replicas
|
|
|
|
wait_for_instances = true
|
|
wait_for_instances_status = "STABLE"
|
|
|
|
version {
|
|
name = local.application_version
|
|
instance_template = google_compute_instance_template.application.self_link
|
|
}
|
|
|
|
dynamic "named_port" {
|
|
for_each = var.application_ports
|
|
|
|
content {
|
|
name = named_port.value.name
|
|
port = named_port.value.port
|
|
}
|
|
}
|
|
|
|
dynamic "auto_healing_policies" {
|
|
for_each = try([google_compute_health_check.port["http"].self_link], [])
|
|
|
|
content {
|
|
initial_delay_sec = local.application_ports_by_name["http"].health_check.initial_delay_sec
|
|
|
|
health_check = auto_healing_policies.value
|
|
}
|
|
}
|
|
|
|
update_policy {
|
|
type = "PROACTIVE"
|
|
minimal_action = "REPLACE"
|
|
|
|
# With reservations we need to take one instance down before provisioning a new one,
|
|
# otherwise we will get an error that there are no available instances for the targeted
|
|
# reservation.
|
|
max_unavailable_fixed = 1
|
|
max_surge_fixed = max(max(1, var.scaling_horizontal_replicas - 1), length(var.compute_instance_availability_zones))
|
|
}
|
|
|
|
timeouts {
|
|
create = "20m"
|
|
update = "30m"
|
|
delete = "20m"
|
|
}
|
|
|
|
depends_on = [
|
|
google_compute_instance_template.application
|
|
]
|
|
}
|
|
|
|
# Auto-scale instances with high CPU and Memory usage
|
|
resource "google_compute_region_autoscaler" "application" {
|
|
count = var.scaling_max_horizontal_replicas != null ? 1 : 0
|
|
|
|
project = var.project_id
|
|
|
|
name = "${local.application_name}-autoscaler"
|
|
|
|
region = var.compute_instance_region
|
|
target = google_compute_region_instance_group_manager.application.id
|
|
|
|
autoscaling_policy {
|
|
max_replicas = var.scaling_max_horizontal_replicas
|
|
min_replicas = var.scaling_horizontal_replicas
|
|
|
|
# wait 3 minutes before trying to measure the CPU utilization for new instances
|
|
cooldown_period = 180
|
|
|
|
cpu_utilization {
|
|
target = 0.8
|
|
}
|
|
}
|
|
}
|