fix(talos): upgrading now actually works without destroying the cluster

Use only one pve_download_file resource and key it to <node>_<schematic-id>_<version>
This commit is contained in:
Vegard Hagen
2024-07-11 22:38:50 +02:00
parent 29f5ce8b14
commit f39dc98104
6 changed files with 35 additions and 28 deletions

View File

@@ -6,8 +6,7 @@ module "talos" {
}
talos_image = {
version = "v1.7.5"
update_version = "v1.8.0-alpha.1"
version = "v1.7.5"
schematic = file("${path.module}/config/talos-image-schematic.yaml")
}
cluster_config = var.cluster_config
@@ -18,6 +17,7 @@ module "talos" {
}
module "proxmox_csi_plugin" {
depends_on = [module.talos]
source = "./bootstrap/proxmox-csi-plugin"
providers = {
@@ -29,6 +29,7 @@ module "proxmox_csi_plugin" {
}
module "sealed_secrets" {
depends_on = [module.talos]
source = "./bootstrap/sealed-secrets"
providers = {
@@ -43,6 +44,7 @@ module "sealed_secrets" {
}
module "volumes" {
depends_on = [module.proxmox_csi_plugin]
source = "./bootstrap/volumes"
providers = {

View File

@@ -6,7 +6,7 @@ terraform {
}
proxmox = {
source = "bpg/proxmox"
version = "0.60.0"
version = "0.61.1"
}
talos = {
source = "siderolabs/talos"

View File

@@ -1,14 +1,21 @@
locals {
version = var.talos_image.version
schematic = var.talos_image.schematic
schematic_id = jsondecode(data.http.schematic_id.response_body)["id"]
update_schematic = coalesce(var.talos_image.update_schematic, var.talos_image.schematic)
url = "${var.talos_image.factory_url}/image/${local.schematic_id}/${local.version}/${var.talos_image.platform}-${var.talos_image.arch}.raw.gz"
image_id = "${local.schematic_id}_${local.version}"
update_version = coalesce(var.talos_image.update_version, var.talos_image.version)
update_schematic = coalesce(var.talos_image.update_schematic, var.talos_image.schematic)
update_schematic_id = jsondecode(data.http.updated_schematic_id.response_body)["id"]
update_url = "${var.talos_image.factory_url}/image/${local.update_schematic_id}/${local.update_version}/${var.talos_image.platform}-${var.talos_image.arch}.raw.gz"
update_image_id = "${local.update_schematic_id}_${local.update_version}"
}
data "http" "schematic_id" {
url = "${var.talos_image.factory_url}/schematics"
method = "POST"
request_body = var.talos_image.schematic
request_body = local.schematic
}
data "http" "updated_schematic_id" {
@@ -18,27 +25,14 @@ data "http" "updated_schematic_id" {
}
resource "proxmox_virtual_environment_download_file" "talos_image" {
for_each = toset(distinct([for k, v in var.cluster_config.nodes : v.host_node]))
for_each = toset(distinct([for k, v in var.cluster_config.nodes : "${v.host_node}_${v.update == true ? local.update_image_id : local.image_id}"]))
node_name = each.key
node_name = split("_", each.key)[0]
content_type = "iso"
datastore_id = var.talos_image.proxmox_datastore
file_name = "talos-${local.schematic_id}-${var.talos_image.version}-${var.talos_image.platform}-${var.talos_image.arch}.img"
url = "${var.talos_image.factory_url}/image/${local.schematic_id}/${var.talos_image.version}/${var.talos_image.platform}-${var.talos_image.arch}.raw.gz"
decompression_algorithm = "gz"
overwrite = false
}
resource "proxmox_virtual_environment_download_file" "updated_talos_image" {
for_each = toset(distinct([for k, v in var.cluster_config.nodes : v.host_node if v.update]))
node_name = each.key
content_type = "iso"
datastore_id = var.talos_image.proxmox_datastore
file_name = "talos-update-${local.update_schematic_id}-${local.update_version}-${var.talos_image.platform}-${var.talos_image.arch}.img"
url = "${var.talos_image.factory_url}/image/${local.update_schematic_id}/${var.talos_image.update_version}/${var.talos_image.platform}-${var.talos_image.arch}.raw.gz"
file_name = "talos-${split("_",each.key)[1]}-${split("_", each.key)[2]}-${var.talos_image.platform}-${var.talos_image.arch}.img"
url = "${var.talos_image.factory_url}/image/${split("_", each.key)[1]}/${split("_", each.key)[2]}/${var.talos_image.platform}-${var.talos_image.arch}.raw.gz"
decompression_algorithm = "gz"
overwrite = false
}

View File

@@ -47,11 +47,14 @@ resource "talos_machine_configuration_apply" "talos_config_apply" {
resource "talos_machine_bootstrap" "talos_bootstrap" {
depends_on = [talos_machine_configuration_apply.talos_config_apply]
client_configuration = talos_machine_secrets.machine_secrets.client_configuration
node = [for k, v in var.cluster_config.nodes : v.ip if v.machine_type == "controlplane" && !v.update][0]
node = [for k, v in var.cluster_config.nodes : v.ip if v.machine_type == "controlplane"][0]
}
data "talos_cluster_health" "health" {
depends_on = [talos_machine_bootstrap.talos_bootstrap]
depends_on = [
talos_machine_configuration_apply.talos_config_apply,
talos_machine_bootstrap.talos_bootstrap
]
client_configuration = data.talos_client_configuration.talos_config.client_configuration
control_plane_nodes = [for k, v in var.cluster_config.nodes : v.ip if v.machine_type == "controlplane"]
worker_nodes = [for k, v in var.cluster_config.nodes : v.ip if v.machine_type == "worker"]
@@ -62,10 +65,12 @@ data "talos_cluster_health" "health" {
}
data "talos_cluster_kubeconfig" "kubeconfig" {
# depends_on = [talos_machine_bootstrap.talos_bootstrap]
depends_on = [talos_machine_bootstrap.talos_bootstrap, data.talos_cluster_health.health]
depends_on = [
talos_machine_bootstrap.talos_bootstrap,
data.talos_cluster_health.health
]
client_configuration = talos_machine_secrets.machine_secrets.client_configuration
node = [for k, v in var.cluster_config.nodes : v.ip if v.machine_type == "controlplane" && !v.update][0]
node = [for k, v in var.cluster_config.nodes : v.ip if v.machine_type == "controlplane"][0]
timeouts = {
read = "1m"
}

View File

@@ -40,7 +40,7 @@ resource "proxmox_virtual_environment_vm" "talos_vm" {
ssd = true
file_format = "raw"
size = 20
file_id = each.value.update ? proxmox_virtual_environment_download_file.updated_talos_image[each.value.host_node].id : proxmox_virtual_environment_download_file.talos_image[each.value.host_node].id
file_id = proxmox_virtual_environment_download_file.talos_image["${each.value.host_node}_${each.value.update == true ? local.update_image_id : local.image_id}"].id
}
boot_order = ["scsi0"]

View File

@@ -33,6 +33,12 @@ variable "cluster_config" {
})
)
})
validation {
condition = length([
for n in var.cluster_config.nodes : n if contains(["controlplane", "worker"], n.machine_type)]) == length(var.cluster_config.nodes)
error_message = "Node machine_type must be either 'controlplane' or 'worker'."
}
}
variable "volumes" {