mirror of
https://github.com/outbackdingo/typhoon.git
synced 2026-01-27 10:20:48 +00:00
Set Azure VMSS upgrade policy to Rolling
* Set a rolling upgrade policy so that changes to the worker node pool are rolled out gradually. Previously, the VMSS model could change, but instances would not receive it until manually replaced * Align Azure node pool behaviors more closely with AWS and GCP: * On AWS, worker instance template changes trigger an instance refresh * On GCP, worker instance template changes roll out via proactive * Define Azure automatic instance repair using Application Health Extension probes to 10256 (kube-proxy or Cilium equivalent) to match the strategy used on Google Cloud
This commit is contained in:
@@ -8,6 +8,9 @@ Notable changes between versions.
|
||||
|
||||
* Add `enable_http_load_balancing` variable to reduce load balancer rules count
|
||||
* Azure charges by load balancer rules (5 included)
|
||||
* Change Azure VMSS instance update policy (i.e. upgrade policy) from Manual to Rolling
|
||||
* Set a rolling upgrade policy so that changes to the worker node pool are rolled out gradually. Previously, the VMSS model could change, but instances would not receive it until manually replaced
|
||||
* Define Azure automatic instance repair using Application Health Extension probes to 10256 (kube-proxy or Cilium equivalent) to match the strategy used on Google Cloud
|
||||
|
||||
### Google Cloud
|
||||
|
||||
|
||||
@@ -89,7 +89,7 @@ resource "azurerm_lb_rule" "apiserver-ipv6" {
|
||||
}
|
||||
|
||||
resource "azurerm_lb_rule" "ingress-http-ipv4" {
|
||||
count = var.enable_http_load_balancing ? 1 : 0
|
||||
count = var.enable_http_load_balancing ? 1 : 0
|
||||
|
||||
name = "ingress-http-ipv4"
|
||||
loadbalancer_id = azurerm_lb.cluster.id
|
||||
|
||||
@@ -146,7 +146,7 @@ EOD
|
||||
|
||||
variable "enable_http_load_balancing" {
|
||||
description = "Enable HTTP (port 80) LB rules"
|
||||
default = false
|
||||
default = false
|
||||
}
|
||||
|
||||
variable "enable_ipv6_load_balancing" {
|
||||
|
||||
@@ -69,6 +69,42 @@ resource "azurerm_orchestrated_virtual_machine_scale_set" "workers" {
|
||||
}
|
||||
}
|
||||
|
||||
# Roll out VMSS changes to instances gradually
|
||||
upgrade_mode = "Rolling"
|
||||
rolling_upgrade_policy {
|
||||
max_batch_instance_percent = 20
|
||||
pause_time_between_batches = "PT2M"
|
||||
|
||||
maximum_surge_instances_enabled = true
|
||||
# Upgrade unhealthy instances first
|
||||
prioritize_unhealthy_instances_enabled = true
|
||||
|
||||
# Safety gate to stop bad rollouts
|
||||
max_unhealthy_instance_percent = 20
|
||||
max_unhealthy_upgraded_instance_percent = 25
|
||||
}
|
||||
|
||||
# Azure instance repair replaces instances that fail probes from the
|
||||
# ApplicationHealthExtension
|
||||
automatic_instance_repair {
|
||||
enabled = true
|
||||
grace_period = "PT15M"
|
||||
action = "Replace"
|
||||
}
|
||||
|
||||
extension {
|
||||
name = "ApplicationHealthExtension"
|
||||
publisher = "Microsoft.ManagedServices"
|
||||
type = "ApplicationHealthLinux"
|
||||
type_handler_version = "1.0"
|
||||
settings = jsonencode({
|
||||
protocol = "http"
|
||||
port = 10256
|
||||
requestPath = "/healthz"
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
# lifecycle
|
||||
# eviction policy may only be set when priority is Spot
|
||||
priority = var.priority
|
||||
|
||||
@@ -89,7 +89,7 @@ resource "azurerm_lb_rule" "apiserver-ipv6" {
|
||||
}
|
||||
|
||||
resource "azurerm_lb_rule" "ingress-http-ipv4" {
|
||||
count = var.enable_http_load_balancing ? 1 : 0
|
||||
count = var.enable_http_load_balancing ? 1 : 0
|
||||
|
||||
name = "ingress-http-ipv4"
|
||||
loadbalancer_id = azurerm_lb.cluster.id
|
||||
|
||||
@@ -152,7 +152,7 @@ EOD
|
||||
|
||||
variable "enable_http_load_balancing" {
|
||||
description = "Enable HTTP (port 80) LB rules"
|
||||
default = false
|
||||
default = false
|
||||
}
|
||||
|
||||
variable "enable_ipv6_load_balancing" {
|
||||
|
||||
@@ -90,6 +90,42 @@ resource "azurerm_orchestrated_virtual_machine_scale_set" "workers" {
|
||||
}
|
||||
}
|
||||
|
||||
# Roll out VMSS changes to instances gradually
|
||||
upgrade_mode = "Rolling"
|
||||
rolling_upgrade_policy {
|
||||
max_batch_instance_percent = 20
|
||||
pause_time_between_batches = "PT2M"
|
||||
|
||||
maximum_surge_instances_enabled = true
|
||||
# Upgrade unhealthy instances first
|
||||
prioritize_unhealthy_instances_enabled = true
|
||||
|
||||
# Safety gate to stop bad rollouts
|
||||
max_unhealthy_instance_percent = 20
|
||||
max_unhealthy_upgraded_instance_percent = 25
|
||||
}
|
||||
|
||||
# Azure instance repair replaces instances that fail probes from the
|
||||
# ApplicationHealthExtension
|
||||
automatic_instance_repair {
|
||||
enabled = true
|
||||
grace_period = "PT15M"
|
||||
action = "Replace"
|
||||
}
|
||||
|
||||
extension {
|
||||
name = "ApplicationHealthExtension"
|
||||
publisher = "Microsoft.ManagedServices"
|
||||
type = "ApplicationHealthLinux"
|
||||
type_handler_version = "1.0"
|
||||
settings = jsonencode({
|
||||
protocol = "http"
|
||||
port = 10256
|
||||
requestPath = "/healthz"
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
# lifecycle
|
||||
# eviction policy may only be set when priority is Spot
|
||||
priority = var.priority
|
||||
|
||||
Reference in New Issue
Block a user