From 71dbd19fdca294e77ee546923c9cccf327c6d29e Mon Sep 17 00:00:00 2001 From: Serge Logvinov Date: Wed, 14 Aug 2024 19:09:55 +0300 Subject: [PATCH] worker nodes --- hetzner/deployments/hcloud-ccm.yaml | 5 ++ ...cloud-cloud-controller-manager-result.yaml | 4 + .../hcloud-cloud-controller-manager.yaml | 89 ------------------- hetzner/instances-controlplane.tf | 3 +- hetzner/instances-web.tf | 7 +- hetzner/instances-worker-as.tf | 6 +- hetzner/instances-worker.tf | 29 ++---- hetzner/modules/templates/worker-as.yaml.tpl | 64 ------------- hetzner/modules/templates/worker.yaml.tpl | 65 -------------- hetzner/modules/worker/main.tf | 55 ------------ hetzner/modules/worker/outputs.tf | 4 - hetzner/modules/worker/variables.tf | 52 ----------- hetzner/modules/worker/versions.tf | 9 -- hetzner/network-lb.tf | 6 ++ hetzner/templates/worker.yaml.tpl | 26 ++++-- scaleway/templates/worker.yaml.tpl | 9 ++ 16 files changed, 61 insertions(+), 372 deletions(-) delete mode 100644 hetzner/deployments/hcloud-cloud-controller-manager.yaml delete mode 100644 hetzner/modules/templates/worker-as.yaml.tpl delete mode 100644 hetzner/modules/templates/worker.yaml.tpl delete mode 100644 hetzner/modules/worker/main.tf delete mode 100644 hetzner/modules/worker/outputs.tf delete mode 100644 hetzner/modules/worker/variables.tf delete mode 100644 hetzner/modules/worker/versions.tf diff --git a/hetzner/deployments/hcloud-ccm.yaml b/hetzner/deployments/hcloud-ccm.yaml index e69de29..b4a1470 100644 --- a/hetzner/deployments/hcloud-ccm.yaml +++ b/hetzner/deployments/hcloud-ccm.yaml @@ -0,0 +1,5 @@ +args: + controllers: cloud-node-lifecycle + +nodeSelector: + node-role.kubernetes.io/control-plane: "" diff --git a/hetzner/deployments/hcloud-cloud-controller-manager-result.yaml b/hetzner/deployments/hcloud-cloud-controller-manager-result.yaml index 67b2228..f2866a8 100644 --- a/hetzner/deployments/hcloud-cloud-controller-manager-result.yaml +++ b/hetzner/deployments/hcloud-cloud-controller-manager-result.yaml @@ -59,11 +59,15 @@ spec: - key: "node.kubernetes.io/not-ready" effect: "NoExecute" + nodeSelector: + + node-role.kubernetes.io/control-plane: "" containers: - name: hcloud-cloud-controller-manager args: - "--allow-untagged-cloud" - "--cloud-provider=hcloud" + - "--controllers=cloud-node-lifecycle" - "--route-reconciliation-period=30s" - "--webhook-secure-port=0" - "--leader-elect=false" diff --git a/hetzner/deployments/hcloud-cloud-controller-manager.yaml b/hetzner/deployments/hcloud-cloud-controller-manager.yaml deleted file mode 100644 index 7c0f1a9..0000000 --- a/hetzner/deployments/hcloud-cloud-controller-manager.yaml +++ /dev/null @@ -1,89 +0,0 @@ -# NOTE: this release was tested against kubernetes v1.18.x ---- -apiVersion: v1 -kind: ServiceAccount -metadata: - name: cloud-controller-manager - namespace: kube-system ---- -kind: ClusterRoleBinding -apiVersion: rbac.authorization.k8s.io/v1 -metadata: - name: system:cloud-controller-manager -roleRef: - apiGroup: rbac.authorization.k8s.io - kind: ClusterRole - name: cluster-admin -subjects: - - kind: ServiceAccount - name: cloud-controller-manager - namespace: kube-system ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: hcloud-cloud-controller-manager - namespace: kube-system -spec: - replicas: 1 - revisionHistoryLimit: 2 - selector: - matchLabels: - app: hcloud-cloud-controller-manager - template: - metadata: - labels: - app: hcloud-cloud-controller-manager - spec: - priorityClassName: system-cluster-critical - serviceAccountName: cloud-controller-manager - dnsPolicy: Default - nodeSelector: - node-role.kubernetes.io/control-plane: "" - node.cloudprovider.kubernetes.io/platform: hcloud - tolerations: - - key: "node.cloudprovider.kubernetes.io/uninitialized" - value: "true" - effect: "NoSchedule" - - key: "node-role.kubernetes.io/control-plane" - effect: NoSchedule - containers: - - image: hetznercloud/hcloud-cloud-controller-manager:v1.17.2 - name: hcloud-cloud-controller-manager - args: - - --cloud-provider=hcloud - - --allow-untagged-cloud - - --controllers=cloud-node-lifecycle - resources: - requests: - cpu: 100m - memory: 50Mi - env: - - name: NODE_NAME - valueFrom: - fieldRef: - fieldPath: spec.nodeName - - name: HCLOUD_TOKEN - valueFrom: - secretKeyRef: - name: hcloud - key: token - - name: HCLOUD_NETWORK - valueFrom: - secretKeyRef: - name: hcloud - key: network - - name: HCLOUD_INSTANCES_ADDRESS_FAMILY - value: dualstack - - name: ROBOT_USER_NAME - valueFrom: - secretKeyRef: - optional: true - name: hcloud - key: user - - name: ROBOT_PASSWORD - valueFrom: - secretKeyRef: - optional: true - name: hcloud - key: password diff --git a/hetzner/instances-controlplane.tf b/hetzner/instances-controlplane.tf index a450ff1..535208b 100644 --- a/hetzner/instances-controlplane.tf +++ b/hetzner/instances-controlplane.tf @@ -1,11 +1,12 @@ locals { + contolplane_prefix = "controlplane" contolplane_labels = "" controlplanes = { for k in flatten([ for regions in var.regions : [ for inx in range(lookup(try(var.controlplane[regions], {}), "count", 0)) : { - name : "controlplane-${regions}-${1 + inx}" + name : "${local.contolplane_prefix}-${regions}-${1 + inx}" image : data.hcloud_image.talos[startswith(lookup(try(var.controlplane[regions], {}), "type", "cpx11"), "ca") ? "arm64" : "amd64"].id region : regions type : lookup(try(var.controlplane[regions], {}), "type", "cpx11") diff --git a/hetzner/instances-web.tf b/hetzner/instances-web.tf index 8ab6e13..6e7dbd8 100644 --- a/hetzner/instances-web.tf +++ b/hetzner/instances-web.tf @@ -27,7 +27,7 @@ resource "hcloud_server" "web" { labels = merge(var.tags, { label = "web" }) user_data = templatefile("${path.module}/templates/worker.yaml.tpl", - merge(var.kubernetes, { + merge(local.kubernetes, try(var.instances["all"], {}), { name = each.value.name ipv4 = each.value.ip lbv4 = local.ipv4_vip @@ -41,6 +41,10 @@ resource "hcloud_server" "web" { network_id = hcloud_network.main.id ip = each.value.ip } + public_net { + ipv4_enabled = true + ipv6_enabled = true + } lifecycle { ignore_changes = [ @@ -48,6 +52,7 @@ resource "hcloud_server" "web" { server_type, user_data, ssh_keys, + public_net, ] } } diff --git a/hetzner/instances-worker-as.tf b/hetzner/instances-worker-as.tf index cfabd6b..7a72a44 100644 --- a/hetzner/instances-worker-as.tf +++ b/hetzner/instances-worker-as.tf @@ -1,10 +1,10 @@ resource "local_sensitive_file" "worker-as" { - content = templatefile("${path.module}/templates/worker-as.yaml.tpl", - merge(var.kubernetes, { + content = templatefile("${path.module}/templates/worker.yaml.tpl", + merge(local.kubernetes, try(var.instances["all"], {}), { lbv4 = local.ipv4_vip nodeSubnets = var.vpc_main_cidr - labels = "project.io/node-pool=worker,hcloud/node-group=worker-as" + labels = "${local.worker_labels},hcloud/node-group=worker-as" }) ) diff --git a/hetzner/instances-worker.tf b/hetzner/instances-worker.tf index ed58dd7..f7e1d53 100644 --- a/hetzner/instances-worker.tf +++ b/hetzner/instances-worker.tf @@ -27,7 +27,7 @@ resource "hcloud_server" "worker" { labels = merge(var.tags, { label = "worker" }) user_data = templatefile("${path.module}/templates/worker.yaml.tpl", - merge(var.kubernetes, { + merge(local.kubernetes, try(var.instances["all"], {}), { name = each.value.name ipv4 = each.value.ip lbv4 = local.ipv4_vip @@ -41,6 +41,10 @@ resource "hcloud_server" "worker" { network_id = hcloud_network.main.id ip = each.value.ip } + public_net { + ipv4_enabled = true + ipv6_enabled = true + } lifecycle { ignore_changes = [ @@ -48,28 +52,7 @@ resource "hcloud_server" "worker" { server_type, user_data, ssh_keys, + public_net, ] } } - -# module "worker" { -# source = "./modules/worker" - -# for_each = var.instances -# location = each.key -# labels = merge(var.tags, { label = "worker" }) -# network = hcloud_network.main.id -# subnet = hcloud_network_subnet.core.ip_range - -# vm_name = "worker-${each.key}-" -# vm_items = lookup(each.value, "worker_count", 0) -# vm_type = lookup(each.value, "worker_type", "cx11") -# vm_image = data.hcloud_image.talos.id -# vm_ip_start = (6 + try(index(var.regions, each.key), 0)) * 10 -# vm_security_group = [hcloud_firewall.worker.id] - -# vm_params = merge(var.kubernetes, { -# lbv4 = local.ipv4_vip -# labels = "project.io/node-pool=worker,hcloud/node-group=worker-${each.key}" -# }) -# } diff --git a/hetzner/modules/templates/worker-as.yaml.tpl b/hetzner/modules/templates/worker-as.yaml.tpl deleted file mode 100644 index d4aadc7..0000000 --- a/hetzner/modules/templates/worker-as.yaml.tpl +++ /dev/null @@ -1,64 +0,0 @@ -version: v1alpha1 -debug: false -persist: true -machine: - type: worker - token: ${tokenMachine} - ca: - crt: ${caMachine} - certSANs: [] - nodeLabels: - node.kubernetes.io/disktype: ssd - kubelet: - extraArgs: - cloud-provider: external - rotate-server-certificates: true - node-labels: "${labels}" - clusterDNS: - - 169.254.2.53 - - ${cidrhost(split(",",serviceSubnets)[0], 10)} - nodeIP: - validSubnets: ${format("%#v",split(",",nodeSubnets))} - network: - interfaces: - - interface: dummy0 - addresses: - - 169.254.2.53/32 - extraHostEntries: - - ip: ${lbv4} - aliases: - - ${apiDomain} - sysctls: - net.core.somaxconn: 65535 - net.core.netdev_max_backlog: 4096 - install: - wipe: false - systemDiskEncryption: - state: - provider: luks2 - keys: - - nodeID: {} - slot: 0 - ephemeral: - provider: luks2 - keys: - - nodeID: {} - slot: 0 - options: - - no_read_workqueue - - no_write_workqueue -cluster: - id: ${clusterID} - secret: ${clusterSecret} - controlPlane: - endpoint: https://${apiDomain}:6443 - clusterName: ${clusterName} - discovery: - enabled: true - network: - dnsDomain: ${domain} - podSubnets: ${format("%#v",split(",",podSubnets))} - serviceSubnets: ${format("%#v",split(",",serviceSubnets))} - token: ${token} - ca: - crt: ${ca} diff --git a/hetzner/modules/templates/worker.yaml.tpl b/hetzner/modules/templates/worker.yaml.tpl deleted file mode 100644 index aa9c5f6..0000000 --- a/hetzner/modules/templates/worker.yaml.tpl +++ /dev/null @@ -1,65 +0,0 @@ -version: v1alpha1 -debug: false -persist: true -machine: - type: worker - token: ${tokenMachine} - ca: - crt: ${caMachine} - certSANs: [] - nodeLabels: - node.kubernetes.io/disktype: ssd - kubelet: - extraArgs: - node-ip: "${ipv4}" - cloud-provider: external - rotate-server-certificates: true - node-labels: "${labels}" - clusterDNS: - - 169.254.2.53 - - ${cidrhost(split(",",serviceSubnets)[0], 10)} - nodeIP: - validSubnets: ${format("%#v",split(",",nodeSubnets))} - network: - hostname: "${name}" - interfaces: - - interface: dummy0 - addresses: - - 169.254.2.53/32 - extraHostEntries: - - ip: ${lbv4} - aliases: - - ${apiDomain} - install: - wipe: false - sysctls: - net.core.somaxconn: 65535 - net.core.netdev_max_backlog: 4096 - systemDiskEncryption: - state: - provider: luks2 - keys: - - nodeID: {} - slot: 0 - ephemeral: - provider: luks2 - keys: - - nodeID: {} - slot: 0 - options: - - no_read_workqueue - - no_write_workqueue -cluster: - id: ${clusterID} - secret: ${clusterSecret} - controlPlane: - endpoint: https://${apiDomain}:6443 - clusterName: ${clusterName} - discovery: - enabled: true - network: - dnsDomain: ${domain} - serviceSubnets: ${format("%#v",split(",",serviceSubnets))} - token: ${token} - ca: - crt: ${ca} diff --git a/hetzner/modules/worker/main.tf b/hetzner/modules/worker/main.tf deleted file mode 100644 index d70f773..0000000 --- a/hetzner/modules/worker/main.tf +++ /dev/null @@ -1,55 +0,0 @@ - -resource "hcloud_server" "worker" { - count = var.vm_items - location = var.location - name = "${var.vm_name}${count.index + 1}" - image = var.vm_image - server_type = var.vm_type - ssh_keys = [] - keep_disk = true - labels = var.labels - - user_data = templatefile("${path.module}/../templates/worker.yaml.tpl", - merge(var.vm_params, { - name = "${var.vm_name}${count.index + 1}" - ipv4 = cidrhost(var.subnet, var.vm_ip_start + count.index) - }) - ) - - firewall_ids = var.vm_security_group - network { - network_id = var.network - ip = cidrhost(var.subnet, var.vm_ip_start + count.index) - } - - lifecycle { - ignore_changes = [ - image, - server_type, - user_data, - ssh_keys, - ] - } - - # IPv6 hack - # provisioner "local-exec" { - # command = "echo '${templatefile("${path.module}/../templates/worker-patch.json.tpl", { ipv6_address = self.ipv6_address })}' > _cfgs/${var.vm_name}${count.index + 1}.patch" - # } - # provisioner "local-exec" { - # command = "sleep 120 && talosctl --talosconfig _cfgs/talosconfig patch --nodes ${cidrhost(var.subnet, var.vm_ip_start + count.index)} machineconfig --patch-file _cfgs/${var.vm_name}${count.index + 1}.patch" - # } -} - -# resource "local_file" "worker" { -# count = var.vm_items -# content = templatefile("${path.module}/../templates/worker.yaml.tpl", -# merge(var.vm_params, { -# name = "${var.vm_name}${count.index + 1}" -# ipv4 = cidrhost(var.subnet, var.vm_ip_start + count.index) -# }) -# ) -# filename = "${var.vm_name}${count.index + 1}.yaml" -# file_permission = "0640" - -# depends_on = [hcloud_server.worker] -# } diff --git a/hetzner/modules/worker/outputs.tf b/hetzner/modules/worker/outputs.tf deleted file mode 100644 index 566945d..0000000 --- a/hetzner/modules/worker/outputs.tf +++ /dev/null @@ -1,4 +0,0 @@ - -output "vms" { - value = hcloud_server.worker -} diff --git a/hetzner/modules/worker/variables.tf b/hetzner/modules/worker/variables.tf deleted file mode 100644 index de4f2a6..0000000 --- a/hetzner/modules/worker/variables.tf +++ /dev/null @@ -1,52 +0,0 @@ - -variable "location" { - type = string - default = "nbg1" -} - -variable "labels" { - type = map(string) - description = "Tags of resources" -} - -variable "network" { - type = string - description = "Network id" -} - -variable "subnet" { - type = string - description = "Subnet cidr" -} - -variable "vm_name" { - type = string - default = "worker-" -} - -variable "vm_items" { - type = number - default = 0 -} - -variable "vm_type" { - type = string - default = "cx11" -} - -variable "vm_image" { - type = string -} - -variable "vm_security_group" { - type = list(string) -} - -variable "vm_ip_start" { - type = number - default = 61 -} - -variable "vm_params" { - type = map(string) -} diff --git a/hetzner/modules/worker/versions.tf b/hetzner/modules/worker/versions.tf deleted file mode 100644 index 923e476..0000000 --- a/hetzner/modules/worker/versions.tf +++ /dev/null @@ -1,9 +0,0 @@ -terraform { - required_providers { - hcloud = { - source = "hetznercloud/hcloud" - version = "~> 1.36.1" - } - } - required_version = ">= 1.2" -} diff --git a/hetzner/network-lb.tf b/hetzner/network-lb.tf index d111b90..e20f213 100644 --- a/hetzner/network-lb.tf +++ b/hetzner/network-lb.tf @@ -18,6 +18,12 @@ resource "hcloud_floating_ip" "api" { labels = merge(var.tags, { type = "infra" }) } +# resource "hcloud_floating_ip_assignment" "api" { +# count = local.lb_enable ? 0 : 1 +# floating_ip_id = hcloud_floating_ip.api[0].id +# server_id = one(hcloud_server.controlplane).id +# } + resource "hcloud_load_balancer" "api" { count = local.lb_enable ? 1 : 0 name = "api" diff --git a/hetzner/templates/worker.yaml.tpl b/hetzner/templates/worker.yaml.tpl index 18f0945..6c444f0 100644 --- a/hetzner/templates/worker.yaml.tpl +++ b/hetzner/templates/worker.yaml.tpl @@ -6,30 +6,40 @@ machine: token: ${tokenMachine} ca: crt: ${caMachine} - nodeLabels: - node.kubernetes.io/disktype: ssd kubelet: + image: ghcr.io/siderolabs/kubelet:${version} + defaultRuntimeSeccompProfileEnabled: true extraArgs: cloud-provider: external rotate-server-certificates: true - node-labels: "${labels}" + node-labels: ${labels} + extraConfig: + imageGCHighThresholdPercent: 70 + imageGCLowThresholdPercent: 50 + allowedUnsafeSysctls: [net.core.somaxconn] clusterDNS: - 169.254.2.53 - ${cidrhost(split(",",serviceSubnets)[0], 10)} nodeIP: validSubnets: ${format("%#v",split(",",nodeSubnets))} network: - hostname: "${name}" interfaces: - interface: dummy0 addresses: - 169.254.2.53/32 + kubespan: + enabled: false + allowDownPeerBypass: true extraHostEntries: - ip: ${lbv4} aliases: - ${apiDomain} + nameservers: + - 1.1.1.1 + - 2606:4700:4700::1111 + - 2001:4860:4860::8888 install: - wipe: false + wipe: true sysctls: net.core.somaxconn: 65535 net.core.netdev_max_backlog: 4096 @@ -47,6 +57,10 @@ machine: options: - no_read_workqueue - no_write_workqueue + features: + rbac: true + stableHostname: true + apidCheckExtKeyUsage: true cluster: id: ${clusterID} secret: ${clusterSecret} @@ -54,7 +68,7 @@ cluster: endpoint: https://${apiDomain}:6443 clusterName: ${clusterName} discovery: - enabled: true + enabled: false network: dnsDomain: ${domain} serviceSubnets: ${format("%#v",split(",",serviceSubnets))} diff --git a/scaleway/templates/worker.yaml.tpl b/scaleway/templates/worker.yaml.tpl index 76b80dc..197629b 100644 --- a/scaleway/templates/worker.yaml.tpl +++ b/scaleway/templates/worker.yaml.tpl @@ -8,10 +8,15 @@ machine: crt: ${caMachine} kubelet: image: ghcr.io/siderolabs/kubelet:${version} + defaultRuntimeSeccompProfileEnabled: true extraArgs: cloud-provider: external rotate-server-certificates: true node-labels: ${labels} + extraConfig: + imageGCHighThresholdPercent: 70 + imageGCLowThresholdPercent: 50 + allowedUnsafeSysctls: [net.core.somaxconn] clusterDNS: - 169.254.2.53 - ${cidrhost(split(",",serviceSubnets)[0], 10)} @@ -48,6 +53,10 @@ machine: options: - no_read_workqueue - no_write_workqueue + features: + rbac: true + stableHostname: true + apidCheckExtKeyUsage: true cluster: id: ${clusterID} secret: ${clusterSecret}