diff --git a/CHANGES.md b/CHANGES.md index 71da2372..265bd8af 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -14,6 +14,14 @@ Notable changes between versions. * Fix `worker_node_labels` on Fedora CoreOS ([#651](https://github.com/poseidon/typhoon/pull/651)) * Fix automatic worker node delete on shutdown on Fedora CoreOS ([#657](https://github.com/poseidon/typhoon/pull/657)) +#### Azure + +* Upgrade to `terraform-provider-azurerm` [v2.0+](https://www.terraform.io/docs/providers/azurerm/guides/2.0-upgrade-guide.html) (action required) + * Switch to Azure's new Linux VM and Linux VM Scale Set resources + * If set, change `worker_priority` from `Low` to `Spot` (action required) + * Set controller's Azure disk caching to None + * Associate subnets (in addition to NICs) with security groups (aesthetic) + #### Google Cloud * Fix `worker_node_labels` on Fedora CoreOS ([#651](https://github.com/poseidon/typhoon/pull/651)) diff --git a/azure/container-linux/kubernetes/controllers.tf b/azure/container-linux/kubernetes/controllers.tf index 6012ad33..ea45c93a 100644 --- a/azure/container-linux/kubernetes/controllers.tf +++ b/azure/container-linux/kubernetes/controllers.tf @@ -32,92 +32,52 @@ resource "azurerm_availability_set" "controllers" { } # Controller instances -resource "azurerm_virtual_machine" "controllers" { +resource "azurerm_linux_virtual_machine" "controllers" { count = var.controller_count resource_group_name = azurerm_resource_group.cluster.name name = "${var.cluster_name}-controller-${count.index}" location = var.region availability_set_id = azurerm_availability_set.controllers.id - vm_size = var.controller_type - # boot - storage_image_reference { + size = var.controller_type + custom_data = base64encode(data.ct_config.controller-ignitions.*.rendered[count.index]) + + # storage + os_disk { + name = "${var.cluster_name}-controller-${count.index}" + caching = "None" + disk_size_gb = var.disk_size + storage_account_type = "Premium_LRS" + } + + source_image_reference { publisher = "CoreOS" offer = "CoreOS" sku = local.channel version = "latest" } - # storage - storage_os_disk { - name = "${var.cluster_name}-controller-${count.index}" - create_option = "FromImage" - caching = "ReadWrite" - disk_size_gb = var.disk_size - os_type = "Linux" - managed_disk_type = "Premium_LRS" - } - # network - network_interface_ids = [azurerm_network_interface.controllers.*.id[count.index]] + network_interface_ids = [ + azurerm_network_interface.controllers.*.id[count.index] + ] - os_profile { - computer_name = "${var.cluster_name}-controller-${count.index}" - admin_username = "core" - custom_data = data.ct_config.controller-ignitions.*.rendered[count.index] + # Azure requires setting admin_ssh_key, though Ignition custom_data handles it too + admin_username = "core" + admin_ssh_key { + username = "core" + public_key = var.ssh_authorized_key } - # Azure mandates setting an ssh_key, even though Ignition custom_data handles it too - os_profile_linux_config { - disable_password_authentication = true - - ssh_keys { - path = "/home/core/.ssh/authorized_keys" - key_data = var.ssh_authorized_key - } - } - - # lifecycle - delete_os_disk_on_termination = true - delete_data_disks_on_termination = true - lifecycle { ignore_changes = [ - storage_os_disk, - os_profile, + os_disk, + custom_data, ] } } -# Controller NICs with public and private IPv4 -resource "azurerm_network_interface" "controllers" { - count = var.controller_count - resource_group_name = azurerm_resource_group.cluster.name - - name = "${var.cluster_name}-controller-${count.index}" - location = azurerm_resource_group.cluster.location - network_security_group_id = azurerm_network_security_group.controller.id - - ip_configuration { - name = "ip0" - subnet_id = azurerm_subnet.controller.id - private_ip_address_allocation = "dynamic" - - # public IPv4 - public_ip_address_id = azurerm_public_ip.controllers.*.id[count.index] - } -} - -# Add controller NICs to the controller backend address pool -resource "azurerm_network_interface_backend_address_pool_association" "controllers" { - count = var.controller_count - - network_interface_id = azurerm_network_interface.controllers[count.index].id - ip_configuration_name = "ip0" - backend_address_pool_id = azurerm_lb_backend_address_pool.controller.id -} - # Controller public IPv4 addresses resource "azurerm_public_ip" "controllers" { count = var.controller_count @@ -129,6 +89,40 @@ resource "azurerm_public_ip" "controllers" { allocation_method = "Static" } +# Controller NICs with public and private IPv4 +resource "azurerm_network_interface" "controllers" { + count = var.controller_count + resource_group_name = azurerm_resource_group.cluster.name + + name = "${var.cluster_name}-controller-${count.index}" + location = azurerm_resource_group.cluster.location + + ip_configuration { + name = "ip0" + subnet_id = azurerm_subnet.controller.id + private_ip_address_allocation = "Dynamic" + # instance public IPv4 + public_ip_address_id = azurerm_public_ip.controllers.*.id[count.index] + } +} + +# Associate controller network interface with controller security group +resource "azurerm_network_interface_security_group_association" "controllers" { + count = var.controller_count + + network_interface_id = azurerm_network_interface.controllers[count.index].id + network_security_group_id = azurerm_network_security_group.controller.id +} + +# Associate controller network interface with controller backend address pool +resource "azurerm_network_interface_backend_address_pool_association" "controllers" { + count = var.controller_count + + network_interface_id = azurerm_network_interface.controllers[count.index].id + ip_configuration_name = "ip0" + backend_address_pool_id = azurerm_lb_backend_address_pool.controller.id +} + # Controller Ignition configs data "ct_config" "controller-ignitions" { count = var.controller_count diff --git a/azure/container-linux/kubernetes/network.tf b/azure/container-linux/kubernetes/network.tf index aa9157b0..ea92a5a7 100644 --- a/azure/container-linux/kubernetes/network.tf +++ b/azure/container-linux/kubernetes/network.tf @@ -24,6 +24,11 @@ resource "azurerm_subnet" "controller" { address_prefix = cidrsubnet(var.host_cidr, 1, 0) } +resource "azurerm_subnet_network_security_group_association" "controller" { + subnet_id = azurerm_subnet.controller.id + network_security_group_id = azurerm_network_security_group.controller.id +} + resource "azurerm_subnet" "worker" { resource_group_name = azurerm_resource_group.cluster.name @@ -32,3 +37,8 @@ resource "azurerm_subnet" "worker" { address_prefix = cidrsubnet(var.host_cidr, 1, 1) } +resource "azurerm_subnet_network_security_group_association" "worker" { + subnet_id = azurerm_subnet.worker.id + network_security_group_id = azurerm_network_security_group.worker.id +} + diff --git a/azure/container-linux/kubernetes/ssh.tf b/azure/container-linux/kubernetes/ssh.tf index 5ff47b35..04f54393 100644 --- a/azure/container-linux/kubernetes/ssh.tf +++ b/azure/container-linux/kubernetes/ssh.tf @@ -13,7 +13,7 @@ resource "null_resource" "copy-controller-secrets" { depends_on = [ module.bootstrap, - azurerm_virtual_machine.controllers + azurerm_linux_virtual_machine.controllers ] connection { diff --git a/azure/container-linux/kubernetes/versions.tf b/azure/container-linux/kubernetes/versions.tf index 098f8194..f9653cab 100644 --- a/azure/container-linux/kubernetes/versions.tf +++ b/azure/container-linux/kubernetes/versions.tf @@ -3,7 +3,7 @@ terraform { required_version = "~> 0.12.6" required_providers { - azurerm = "~> 1.27" + azurerm = "~> 2.0" ct = "~> 0.3" template = "~> 2.1" null = "~> 2.1" diff --git a/azure/container-linux/kubernetes/workers.tf b/azure/container-linux/kubernetes/workers.tf index 11b77c50..a99db98d 100644 --- a/azure/container-linux/kubernetes/workers.tf +++ b/azure/container-linux/kubernetes/workers.tf @@ -22,4 +22,3 @@ module "workers" { clc_snippets = var.worker_clc_snippets node_labels = var.worker_node_labels } - diff --git a/azure/container-linux/kubernetes/workers/workers.tf b/azure/container-linux/kubernetes/workers/workers.tf index 7acdf63e..ea41a0ca 100644 --- a/azure/container-linux/kubernetes/workers/workers.tf +++ b/azure/container-linux/kubernetes/workers/workers.tf @@ -5,53 +5,40 @@ locals { } # Workers scale set -resource "azurerm_virtual_machine_scale_set" "workers" { +resource "azurerm_linux_virtual_machine_scale_set" "workers" { resource_group_name = var.resource_group_name - name = "${var.name}-workers" + name = "${var.name}-worker" location = var.region + sku = var.vm_type + instances = var.worker_count + # instance name prefix for instances in the set + computer_name_prefix = "${var.name}-worker" single_placement_group = false + custom_data = base64encode(data.ct_config.worker-ignition.rendered) - sku { - name = var.vm_type - tier = "standard" - capacity = var.worker_count + # storage + os_disk { + storage_account_type = "Standard_LRS" + caching = "ReadWrite" } - # boot - storage_profile_image_reference { + source_image_reference { publisher = "CoreOS" offer = "CoreOS" sku = local.channel version = "latest" } - # storage - storage_profile_os_disk { - create_option = "FromImage" - caching = "ReadWrite" - os_type = "linux" - managed_disk_type = "Standard_LRS" - } - - os_profile { - computer_name_prefix = "${var.name}-worker-" - admin_username = "core" - custom_data = data.ct_config.worker-ignition.rendered - } - - # Azure mandates setting an ssh_key, even though Ignition custom_data handles it too - os_profile_linux_config { - disable_password_authentication = true - - ssh_keys { - path = "/home/core/.ssh/authorized_keys" - key_data = var.ssh_authorized_key - } + # Azure requires setting admin_ssh_key, though Ignition custom_data handles it too + admin_username = "core" + admin_ssh_key { + username = "core" + public_key = var.ssh_authorized_key } # network - network_profile { + network_interface { name = "nic0" primary = true network_security_group_id = var.security_group_id @@ -67,10 +54,10 @@ resource "azurerm_virtual_machine_scale_set" "workers" { } # lifecycle - upgrade_policy_mode = "Manual" - # eviction policy may only be set when priority is Low + upgrade_mode = "Manual" + # eviction policy may only be set when priority is Spot priority = var.priority - eviction_policy = var.priority == "Low" ? "Delete" : null + eviction_policy = var.priority == "Spot" ? "Delete" : null } # Scale up or down to maintain desired number, tolerating deallocations. @@ -82,7 +69,7 @@ resource "azurerm_monitor_autoscale_setting" "workers" { # autoscale enabled = true - target_resource_id = azurerm_virtual_machine_scale_set.workers.id + target_resource_id = azurerm_linux_virtual_machine_scale_set.workers.id profile { name = "default" diff --git a/docs/cl/azure.md b/docs/cl/azure.md index 3bf311c9..0ece179a 100644 --- a/docs/cl/azure.md +++ b/docs/cl/azure.md @@ -50,7 +50,7 @@ Configure the Azure provider in a `providers.tf` file. ```tf provider "azurerm" { - version = "1.43.0" + version = "2.0.0" } provider "ct" { @@ -225,7 +225,7 @@ Reference the DNS zone with `azurerm_dns_zone.clusters.name` and its resource gr | worker_type | Machine type for workers | "Standard_DS1_v2" | See below | | os_image | Channel for a Container Linux derivative | "coreos-stable" | coreos-stable, coreos-beta, coreos-alpha | | disk_size | Size of the disk in GB | 40 | 100 | -| worker_priority | Set priority to Low to use reduced cost surplus capacity, with the tradeoff that instances can be deallocated at any time | Regular | Low | +| worker_priority | Set priority to Spot to use reduced cost surplus capacity, with the tradeoff that instances can be deallocated at any time | Regular | Spot | | controller_clc_snippets | Controller Container Linux Config snippets | [] | [example](/advanced/customization/#usage) | | worker_clc_snippets | Worker Container Linux Config snippets | [] | [example](/advanced/customization/#usage) | | networking | Choice of networking provider | "calico" | "flannel" or "calico" | @@ -242,6 +242,6 @@ Check the list of valid [machine types](https://azure.microsoft.com/en-us/pricin !!! warning Do not choose a `controller_type` smaller than `Standard_B2s`. Smaller instances are not sufficient for running a controller. -#### Low Priority +#### Spot Priority -Add `worker_priority=Low` to use [Low Priority](https://docs.microsoft.com/en-us/azure/virtual-machine-scale-sets/virtual-machine-scale-sets-use-low-priority) workers that run on Azure's surplus capacity at lower cost, but with the tradeoff that they can be deallocated at random. Low priority VMs are Azure's analog to AWS spot instances or GCP premptible instances. +Add `worker_priority=Spot` to use [Spot Priority](https://docs.microsoft.com/en-us/azure/virtual-machines/linux/spot-vms) workers that run on Azure's surplus capacity at lower cost, but with the tradeoff that they can be deallocated at random. Spot priority VMs are Azure's analog to AWS spot instances or GCP premptible instances.