From 98916a2ae48a501c75b6d30cbdcddfd74a8da8e3 Mon Sep 17 00:00:00 2001 From: Serge Logvinov Date: Sat, 15 Jun 2024 12:55:04 +0300 Subject: [PATCH] update proxmox setup --- .../vars/local-path-storage-result.yaml | 83 +- _deployments/vars/local-path-storage.yaml | 12 +- oracle/deployments/oci-csi-controller.yaml | 6 - oracle/deployments/oci-csi-node.yaml | 25 - oracle/deployments/oci-storage.yaml | 13 + proxmox/.gitignore | 12 +- proxmox/.sops.yaml | 21 + proxmox/Makefile | 56 +- proxmox/auth.tf | 38 +- proxmox/common.tf | 86 +- proxmox/deployments/cilium-result.yaml | 908 ------------------ proxmox/deployments/cilium.yaml | 77 -- proxmox/deployments/pod-ephemeral.yaml | 2 +- proxmox/deployments/proxmox-csi.yaml | 41 - proxmox/deployments/talos-ccm.yaml | 38 + proxmox/deployments/test-statefulset.yaml | 12 +- proxmox/deployments/ubuntu.yaml | 49 + proxmox/images/.gitignore | 2 - proxmox/images/Makefile | 18 - proxmox/images/README.md | 54 -- proxmox/images/proxmox.pkr.hcl | 80 -- proxmox/images/variables.pkr.hcl | 33 - proxmox/init/outputs.tf | 10 + proxmox/init/users.tf | 44 +- proxmox/init/variables.tf | 5 - proxmox/init/versions.tf | 2 +- proxmox/instances-controlplane.tf | 257 +++-- proxmox/instances-db.tf | 230 +++++ proxmox/instances-web.tf | 292 ++++-- proxmox/instances-worker.tf | 338 +++---- proxmox/network-lb.tf | 5 - proxmox/network.tf | 7 +- proxmox/outputs.tf | 30 +- proxmox/templates/controlplane.yaml.tpl | 42 +- proxmox/templates/web.yaml.tpl | 20 +- proxmox/templates/worker.patch.yaml.tpl | 45 - proxmox/templates/worker.yaml.tpl | 30 +- proxmox/variables.tf | 197 ++-- proxmox/vars/proxmox-ccm.yaml | 15 + proxmox/vars/proxmox-csi.yaml | 23 + proxmox/vars/proxmox-ns.yaml | 8 + proxmox/vars/secrets.proxmox.yaml | 7 + proxmox/versions.tf | 12 +- 43 files changed, 1425 insertions(+), 1860 deletions(-) create mode 100644 proxmox/.sops.yaml delete mode 100644 proxmox/deployments/cilium-result.yaml delete mode 100644 proxmox/deployments/cilium.yaml delete mode 100644 proxmox/deployments/proxmox-csi.yaml create mode 100644 proxmox/deployments/talos-ccm.yaml create mode 100644 proxmox/deployments/ubuntu.yaml delete mode 100644 proxmox/images/.gitignore delete mode 100644 proxmox/images/Makefile delete mode 100644 proxmox/images/README.md delete mode 100644 proxmox/images/proxmox.pkr.hcl delete mode 100644 proxmox/images/variables.pkr.hcl create mode 100644 proxmox/init/outputs.tf create mode 100644 proxmox/instances-db.tf delete mode 100644 proxmox/network-lb.tf delete mode 100644 proxmox/templates/worker.patch.yaml.tpl create mode 100644 proxmox/vars/proxmox-ccm.yaml create mode 100644 proxmox/vars/proxmox-csi.yaml create mode 100644 proxmox/vars/proxmox-ns.yaml create mode 100644 proxmox/vars/secrets.proxmox.yaml diff --git a/_deployments/vars/local-path-storage-result.yaml b/_deployments/vars/local-path-storage-result.yaml index 3a82411..c711907 100644 --- a/_deployments/vars/local-path-storage-result.yaml +++ b/_deployments/vars/local-path-storage-result.yaml @@ -74,6 +74,7 @@ metadata: app.kubernetes.io/managed-by: Helm annotations: storageclass.kubernetes.io/is-default-class: "true" + defaultVolumeType: "hostPath" provisioner: rancher.io/local-path volumeBindingMode: WaitForFirstConsumer reclaimPolicy: Delete @@ -91,18 +92,18 @@ metadata: app.kubernetes.io/version: "v0.0.25-dev" app.kubernetes.io/managed-by: Helm rules: -- apiGroups: [""] - resources: ["nodes", "persistentvolumeclaims", "configmaps"] - verbs: ["get", "list", "watch"] -- apiGroups: [""] - resources: ["endpoints", "persistentvolumes", "pods"] - verbs: ["*"] -- apiGroups: [""] - resources: ["events"] - verbs: ["create", "patch"] -- apiGroups: ["storage.k8s.io"] - resources: ["storageclasses"] - verbs: ["get", "list", "watch"] + - apiGroups: [""] + resources: ["nodes", "persistentvolumeclaims", "configmaps", "pods", "pods/log"] + verbs: ["get", "list", "watch"] + - apiGroups: [""] + resources: ["persistentvolumes"] + verbs: ["get", "list", "watch", "create", "patch", "update", "delete"] + - apiGroups: [""] + resources: ["events"] + verbs: ["create", "patch"] + - apiGroups: ["storage.k8s.io"] + resources: ["storageclasses"] + verbs: ["get", "list", "watch"] --- # Source: local-path-provisioner/templates/clusterrolebinding.yaml apiVersion: rbac.authorization.k8s.io/v1 @@ -124,6 +125,44 @@ subjects: name: local-path-provisioner namespace: local-path-storage --- +# Source: local-path-provisioner/templates/role.yaml +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: local-path-provisioner + namespace: local-path-storage + labels: + app.kubernetes.io/name: local-path-provisioner + helm.sh/chart: local-path-provisioner-0.0.25-dev + app.kubernetes.io/instance: local-path-provisioner + app.kubernetes.io/version: "v0.0.25-dev" + app.kubernetes.io/managed-by: Helm +rules: + - apiGroups: [""] + resources: ["pods"] + verbs: ["get", "list", "watch", "create", "patch", "update", "delete"] +--- +# Source: local-path-provisioner/templates/rolebinding.yaml +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: local-path-provisioner + namespace: local-path-storage + labels: + app.kubernetes.io/name: local-path-provisioner + helm.sh/chart: local-path-provisioner-0.0.25-dev + app.kubernetes.io/instance: local-path-provisioner + app.kubernetes.io/version: "v0.0.25-dev" + app.kubernetes.io/managed-by: Helm +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: local-path-provisioner +subjects: + - kind: ServiceAccount + name: local-path-provisioner + namespace: local-path-storage +--- # Source: local-path-provisioner/templates/deployment.yaml apiVersion: apps/v1 kind: Deployment @@ -141,17 +180,35 @@ spec: selector: matchLabels: app.kubernetes.io/name: local-path-provisioner + helm.sh/chart: local-path-provisioner-0.0.25-dev app.kubernetes.io/instance: local-path-provisioner + app.kubernetes.io/version: "v0.0.25-dev" + app.kubernetes.io/managed-by: Helm template: metadata: labels: app.kubernetes.io/name: local-path-provisioner + helm.sh/chart: local-path-provisioner-0.0.25-dev app.kubernetes.io/instance: local-path-provisioner + app.kubernetes.io/version: "v0.0.25-dev" + app.kubernetes.io/managed-by: Helm spec: serviceAccountName: local-path-provisioner + securityContext: + {} containers: - name: local-path-provisioner - image: "rancher/local-path-provisioner:v0.0.24" + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: true + runAsGroup: 65534 + runAsUser: 65534 + seccompProfile: + type: RuntimeDefault + image: "rancher/local-path-provisioner:v0.0.26" imagePullPolicy: IfNotPresent command: - local-path-provisioner diff --git a/_deployments/vars/local-path-storage.yaml b/_deployments/vars/local-path-storage.yaml index 686bdcb..fd5ecc6 100644 --- a/_deployments/vars/local-path-storage.yaml +++ b/_deployments/vars/local-path-storage.yaml @@ -1,7 +1,7 @@ image: repository: rancher/local-path-provisioner - tag: v0.0.24 + tag: v0.0.26 storageClass: create: true @@ -16,6 +16,16 @@ nodePathMap: configmap: name: local-path-config +securityContext: + allowPrivilegeEscalation: false + seccompProfile: + type: RuntimeDefault + capabilities: + drop: ["ALL"] + runAsUser: 65534 + runAsGroup: 65534 + readOnlyRootFilesystem: true + resources: limits: cpu: 50m diff --git a/oracle/deployments/oci-csi-controller.yaml b/oracle/deployments/oci-csi-controller.yaml index d14218d..3382d5c 100644 --- a/oracle/deployments/oci-csi-controller.yaml +++ b/oracle/deployments/oci-csi-controller.yaml @@ -131,18 +131,12 @@ spec: - name: config mountPath: /etc/oci/ readOnly: true - - name: kubernetes - mountPath: /etc/kubernetes - readOnly: true - mountPath: /var/run/shared-tmpfs name: shared-tmpfs volumes: - name: config secret: secretName: oci-volume-provisioner - - name: kubernetes - hostPath: - path: /etc/kubernetes - name: shared-tmpfs emptyDir: {} dnsPolicy: ClusterFirst diff --git a/oracle/deployments/oci-csi-node.yaml b/oracle/deployments/oci-csi-node.yaml index 90f7248..a1dd5c1 100644 --- a/oracle/deployments/oci-csi-node.yaml +++ b/oracle/deployments/oci-csi-node.yaml @@ -114,8 +114,6 @@ spec: volumeMounts: - mountPath: /csi name: plugin-dir - - mountPath: /fss - name: fss-plugin-dir - mountPath: /var/lib/kubelet mountPropagation: Bidirectional name: pods-mount-dir @@ -157,25 +155,6 @@ spec: name: plugin-dir - mountPath: /registration name: registration-dir - - name: csi-node-registrar-fss - args: - - --csi-address=/fss/csi.sock - - --kubelet-registration-path=/var/lib/kubelet/plugins/fss.csi.oraclecloud.com/csi.sock - image: k8s.gcr.io/sig-storage/csi-node-driver-registrar:v2.5.0 - securityContext: - privileged: true - lifecycle: - preStop: - exec: - command: - - /bin/sh - - -c - - rm -rf /registration/fss.csi.oraclecloud.com /registration/fss.csi.oraclecloud.com-reg.sock - volumeMounts: - - mountPath: /fss - name: fss-plugin-dir - - mountPath: /registration - name: registration-dir dnsPolicy: ClusterFirst hostNetwork: true restartPolicy: Always @@ -192,10 +171,6 @@ spec: path: /var/lib/kubelet/plugins/blockvolume.csi.oraclecloud.com type: DirectoryOrCreate name: plugin-dir - - hostPath: - path: /var/lib/kubelet/plugins/fss.csi.oraclecloud.com - type: DirectoryOrCreate - name: fss-plugin-dir - hostPath: path: /var/lib/kubelet type: Directory diff --git a/oracle/deployments/oci-storage.yaml b/oracle/deployments/oci-storage.yaml index e79476d..7c30fa9 100644 --- a/oracle/deployments/oci-storage.yaml +++ b/oracle/deployments/oci-storage.yaml @@ -19,6 +19,19 @@ reclaimPolicy: Delete --- apiVersion: storage.k8s.io/v1 kind: StorageClass +metadata: + name: oci-bv-high +provisioner: blockvolume.csi.oraclecloud.com +parameters: + csi.storage.k8s.io/fstype: xfs + attachment-type: "paravirtualized" + vpusPerGB: "20" +volumeBindingMode: WaitForFirstConsumer +allowVolumeExpansion: true +reclaimPolicy: Delete +--- +apiVersion: storage.k8s.io/v1 +kind: StorageClass metadata: name: oci-bv-encrypted provisioner: blockvolume.csi.oraclecloud.com diff --git a/proxmox/.gitignore b/proxmox/.gitignore index 648bbea..daec180 100644 --- a/proxmox/.gitignore +++ b/proxmox/.gitignore @@ -1,3 +1,11 @@ _cfgs/ -templates/worker.patch.yaml -config.yaml +.terraform.lock.hcl +.terraform.tfstate.lock.info +/terraform.tfstate +terraform.tfstate.backup +terraform.tfvars +terraform.tfvars.json +terraform.tfvars.sops.json +# +age.key.txt +.env.yaml diff --git a/proxmox/.sops.yaml b/proxmox/.sops.yaml new file mode 100644 index 0000000..dfc10bd --- /dev/null +++ b/proxmox/.sops.yaml @@ -0,0 +1,21 @@ +--- +creation_rules: + - path_regex: \.env\.yaml$ + key_groups: + - age: + - age1ngvggfld4elq68926uczkes9rcqfjhnqn0tr6l8avyp4h46qzucqvx3sdf + - path_regex: terraform\.tfvars\.sops\.json$ + encrypted_regex: "(token|Secret|ID)" + key_groups: + - age: + - age1ngvggfld4elq68926uczkes9rcqfjhnqn0tr6l8avyp4h46qzucqvx3sdf + - path_regex: _cfgs/controlplane.yaml$ + encrypted_regex: "(token|key|secret|id)" + key_groups: + - age: + - age1ngvggfld4elq68926uczkes9rcqfjhnqn0tr6l8avyp4h46qzucqvx3sdf + - path_regex: _cfgs/talosconfig$ + encrypted_regex: "key" + key_groups: + - age: + - age1ngvggfld4elq68926uczkes9rcqfjhnqn0tr6l8avyp4h46qzucqvx3sdf diff --git a/proxmox/Makefile b/proxmox/Makefile index 24278fc..bb1d1ae 100644 --- a/proxmox/Makefile +++ b/proxmox/Makefile @@ -6,6 +6,8 @@ ifneq (,$(findstring Warning,${ENDPOINT})) ENDPOINT := api.cluster.local endif +SOPS_AGE_KEY_FILE ?= age.key.txt + help: @awk 'BEGIN {FS = ":.*?## "} /^[0-9a-zA-Z_-]+:.*?## / {sub("\\\\n",sprintf("\n%22c"," "), $$2);printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}' $(MAKEFILE_LIST) @@ -13,9 +15,12 @@ help: init: ## Initialize terraform terraform init -upgrade +create-age: ## Create age key + age-keygen -o age.key.txt + create-config: ## Genereate talos configs terraform apply -auto-approve -target=local_file.worker_patch - talosctl gen config --output-dir _cfgs --with-docs=false --with-examples=false --config-patch-worker @templates/worker.patch.yaml ${CLUSTERNAME} https://${ENDPOINT}:6443 + talosctl gen config --output-dir _cfgs --with-docs=false --with-examples=false ${CLUSTERNAME} https://${ENDPOINT}:6443 talosctl --talosconfig _cfgs/talosconfig config endpoint ${ENDPOINT} create-templates: @@ -31,16 +36,51 @@ create-templates: @yq eval '.cluster.token' _cfgs/controlplane.yaml | awk '{ print "token: "$$1}' >> _cfgs/tfstate.vars @yq eval '.cluster.ca.crt' _cfgs/controlplane.yaml | awk '{ print "ca: "$$1}' >> _cfgs/tfstate.vars - @yq eval -o=json '{"kubernetes": .}' _cfgs/tfstate.vars > terraform.tfvars.json + @yq eval -o=json '{"kubernetes": .}' _cfgs/tfstate.vars > terraform.tfvars.sops.json + @sops --encrypt -i terraform.tfvars.sops.json + @yq eval .ca _cfgs/tfstate.vars | base64 --decode > _cfgs/ca.crt + @sops --encrypt --input-type=yaml --output-type=yaml _cfgs/talosconfig > _cfgs/talosconfig.sops.yaml + @git add -f _cfgs/talosconfig.sops.yaml _cfgs/ca.crt terraform.tfvars.sops.json -create-controlplane-bootstrap: +create-controlplane-bootstrap: ## Bootstrap controlplane talosctl --talosconfig _cfgs/talosconfig config endpoint ${ENDPOINT} talosctl --talosconfig _cfgs/talosconfig --nodes ${CPFIRST} bootstrap -create-controlplane: ## Bootstrap first controlplane node - terraform apply -auto-approve -target=null_resource.controlplane - -create-kubeconfig: ## Prepare kubeconfig +kubeconfig: ## Prepare kubeconfig + rm -f kubeconfig + talosctl --talosconfig _cfgs/talosconfig config endpoint ${ENDPOINT} talosctl --talosconfig _cfgs/talosconfig --nodes ${CPFIRST} kubeconfig . - kubectl --kubeconfig=kubeconfig config set clusters.${CLUSTERNAME}.server https://${ENDPOINT}:6443 + kubectl --kubeconfig=kubeconfig config set clusters.${CLUSTERNAME}.server https://[${ENDPOINT}]:6443 kubectl --kubeconfig=kubeconfig config set-context --current --namespace=kube-system + +########################################################################################## + +nodes: ## Show kubernetes nodes + @kubectl get nodes -owide --sort-by '{.metadata.name}' --label-columns topology.kubernetes.io/region,topology.kubernetes.io/zone,node.kubernetes.io/instance-type + +system: + helm --kubeconfig=kubeconfig upgrade -i --namespace=kube-system --version=1.15.6 -f ../_deployments/vars/cilium.yaml \ + cilium cilium/cilium + + kubectl --kubeconfig=kubeconfig -n kube-system delete svc cilium-agent + + kubectl --kubeconfig=kubeconfig apply -f ../_deployments/vars/coredns-local.yaml + + helm --kubeconfig=kubeconfig upgrade -i --namespace=kube-system -f ../_deployments/vars/metrics-server.yaml \ + metrics-server metrics-server/metrics-server + + helm --kubeconfig=kubeconfig upgrade -i --namespace=kube-system -f deployments/talos-ccm.yaml \ + --set-string image.tag=edge \ + --set useDaemonSet=true \ + talos-cloud-controller-manager \ + oci://ghcr.io/siderolabs/charts/talos-cloud-controller-manager + + helm --kubeconfig=kubeconfig upgrade -i --namespace=kube-system -f vars/proxmox-ccm.yaml \ + proxmox-cloud-controller-manager oci://ghcr.io/sergelogvinov/charts/proxmox-cloud-controller-manager + + # + # File vars/secrets.proxmox.yaml should be created manually + # + kubectl --kubeconfig=kubeconfig apply -f vars/proxmox-ns.yaml + helm --kubeconfig=kubeconfig secrets upgrade -i --namespace=csi-proxmox -f vars/proxmox-csi.yaml -f vars/secrets.proxmox.yaml \ + proxmox-csi-plugin oci://ghcr.io/sergelogvinov/charts/proxmox-csi-plugin diff --git a/proxmox/auth.tf b/proxmox/auth.tf index 55c10da..a731b01 100644 --- a/proxmox/auth.tf +++ b/proxmox/auth.tf @@ -1,18 +1,26 @@ -# provider "proxmox" { -# virtual_environment { -# endpoint = "https://${var.proxmox_host}:8006/" -# insecure = true - -# username = var.proxmox_token_id -# password = var.proxmox_token_secret -# } -# } - provider "proxmox" { - pm_api_url = "https://${var.proxmox_host}:8006/api2/json" - pm_api_token_id = var.proxmox_token_id - pm_api_token_secret = var.proxmox_token_secret - pm_tls_insecure = true - pm_debug = true + endpoint = "https://${var.proxmox_host}:8006/" + insecure = true + + # api_token = data.sops_file.envs.data["PROXMOX_VE_API_TOKEN"] + username = "root@pam" + password = data.sops_file.envs.data["PROXMOX_VE_PASSWORD"] + + ssh { + username = "root" + agent = true + + dynamic "node" { + for_each = var.nodes + content { + name = node.key + address = node.value.ip4 + } + } + } +} + +data "sops_file" "envs" { + source_file = ".env.yaml" } diff --git a/proxmox/common.tf b/proxmox/common.tf index f02caf2..de49ab4 100644 --- a/proxmox/common.tf +++ b/proxmox/common.tf @@ -1,13 +1,79 @@ -resource "local_file" "worker_patch" { - content = templatefile("${path.module}/templates/worker.patch.yaml.tpl", - merge(var.kubernetes, { - lbv4 = local.ipv4_vip - nodeSubnets = var.vpc_main_cidr - labels = "project.io/node-pool=worker" - }) - ) +locals { + cpu_numa = { + for k, v in var.nodes : k => [for i in lookup(v, "cpu", "") : + flatten([for r in split(",", i) : (strcontains(r, "-") ? range(split("-", r)[0], split("-", r)[1] + 1, 1) : [r])]) + ] + } - filename = "${path.module}/templates/worker.patch.yaml" - file_permission = "0600" + cpus = { for k, v in local.cpu_numa : k => + flatten([for numa in v : flatten([for r in range(length(numa) / 2) : [numa[r], numa[r + length(numa) / 2]]])]) + } +} + +data "proxmox_virtual_environment_node" "node" { + for_each = { for inx, zone in local.zones : zone => inx if lookup(try(var.instances[zone], {}), "enabled", false) } + node_name = each.key +} + +resource "proxmox_virtual_environment_download_file" "talos" { + for_each = { for inx, zone in local.zones : zone => inx if lookup(try(var.instances[zone], {}), "enabled", false) } + node_name = each.key + content_type = "iso" + datastore_id = "local" + file_name = "talos.raw.xz.img" + overwrite = false + + decompression_algorithm = "zst" + url = "https://github.com/siderolabs/talos/releases/download/v${var.release}/nocloud-amd64.raw.xz" +} + +resource "proxmox_virtual_environment_vm" "template" { + for_each = { for inx, zone in local.zones : zone => inx if lookup(try(var.instances[zone], {}), "enabled", false) } + name = "talos" + node_name = each.key + vm_id = each.value + 1000 + on_boot = false + template = true + description = "Talos ${var.release} template" + + tablet_device = false + + machine = "pc" + cpu { + architecture = "x86_64" + cores = 1 + sockets = 1 + numa = true + type = "host" + } + + scsi_hardware = "virtio-scsi-single" + disk { + file_id = proxmox_virtual_environment_download_file.talos[each.key].id + datastore_id = "local" + interface = "scsi0" + ssd = true + iothread = true + cache = "none" + size = 2 + file_format = "raw" + } + + operating_system { + type = "l26" + } + + serial_device {} + vga { + type = "serial0" + } + + lifecycle { + ignore_changes = [ + ipv4_addresses, + ipv6_addresses, + network_interface_names, + ] + } } diff --git a/proxmox/deployments/cilium-result.yaml b/proxmox/deployments/cilium-result.yaml deleted file mode 100644 index 469a4d5..0000000 --- a/proxmox/deployments/cilium-result.yaml +++ /dev/null @@ -1,908 +0,0 @@ ---- -# Source: cilium/templates/cilium-agent/serviceaccount.yaml -apiVersion: v1 -kind: ServiceAccount -metadata: - name: "cilium" - namespace: kube-system ---- -# Source: cilium/templates/cilium-operator/serviceaccount.yaml -apiVersion: v1 -kind: ServiceAccount -metadata: - name: "cilium-operator" - namespace: kube-system ---- -# Source: cilium/templates/cilium-configmap.yaml -apiVersion: v1 -kind: ConfigMap -metadata: - name: cilium-config - namespace: kube-system -data: - - # Identity allocation mode selects how identities are shared between cilium - # nodes by setting how they are stored. The options are "crd" or "kvstore". - # - "crd" stores identities in kubernetes as CRDs (custom resource definition). - # These can be queried with: - # kubectl get ciliumid - # - "kvstore" stores identities in an etcd kvstore, that is - # configured below. Cilium versions before 1.6 supported only the kvstore - # backend. Upgrades from these older cilium versions should continue using - # the kvstore by commenting out the identity-allocation-mode below, or - # setting it to "kvstore". - identity-allocation-mode: crd - cilium-endpoint-gc-interval: "5m0s" - nodes-gc-interval: "5m0s" - skip-cnp-status-startup-clean: "false" - # Disable the usage of CiliumEndpoint CRD - disable-endpoint-crd: "false" - - # If you want to run cilium in debug mode change this value to true - debug: "false" - # The agent can be put into the following three policy enforcement modes - # default, always and never. - # https://docs.cilium.io/en/latest/policy/intro/#policy-enforcement-modes - enable-policy: "default" - # If you want metrics enabled in all of your Cilium agents, set the port for - # which the Cilium agents will have their metrics exposed. - # This option deprecates the "prometheus-serve-addr" in the - # "cilium-metrics-config" ConfigMap - # NOTE that this will open the port on ALL nodes where Cilium pods are - # scheduled. - prometheus-serve-addr: ":9962" - # Port to expose Envoy metrics (e.g. "9964"). Envoy metrics listener will be disabled if this - # field is not set. - proxy-prometheus-port: "9964" - - # Enable IPv4 addressing. If enabled, all endpoints are allocated an IPv4 - # address. - enable-ipv4: "true" - - # Enable IPv6 addressing. If enabled, all endpoints are allocated an IPv6 - # address. - enable-ipv6: "true" - # Users who wish to specify their own custom CNI configuration file must set - # custom-cni-conf to "true", otherwise Cilium may overwrite the configuration. - custom-cni-conf: "false" - enable-bpf-clock-probe: "true" - # If you want cilium monitor to aggregate tracing for packets, set this level - # to "low", "medium", or "maximum". The higher the level, the less packets - # that will be seen in monitor output. - monitor-aggregation: medium - - # The monitor aggregation interval governs the typical time between monitor - # notification events for each allowed connection. - # - # Only effective when monitor aggregation is set to "medium" or higher. - monitor-aggregation-interval: 5s - - # The monitor aggregation flags determine which TCP flags which, upon the - # first observation, cause monitor notifications to be generated. - # - # Only effective when monitor aggregation is set to "medium" or higher. - monitor-aggregation-flags: all - # Specifies the ratio (0.0-1.0) of total system memory to use for dynamic - # sizing of the TCP CT, non-TCP CT, NAT and policy BPF maps. - bpf-map-dynamic-size-ratio: "0.0025" - # bpf-policy-map-max specifies the maximum number of entries in endpoint - # policy map (per endpoint) - bpf-policy-map-max: "16384" - # bpf-lb-map-max specifies the maximum number of entries in bpf lb service, - # backend and affinity maps. - bpf-lb-map-max: "65536" - # bpf-lb-bypass-fib-lookup instructs Cilium to enable the FIB lookup bypass - # optimization for nodeport reverse NAT handling. - bpf-lb-external-clusterip: "false" - - # Pre-allocation of map entries allows per-packet latency to be reduced, at - # the expense of up-front memory allocation for the entries in the maps. The - # default value below will minimize memory usage in the default installation; - # users who are sensitive to latency may consider setting this to "true". - # - # This option was introduced in Cilium 1.4. Cilium 1.3 and earlier ignore - # this option and behave as though it is set to "true". - # - # If this value is modified, then during the next Cilium startup the restore - # of existing endpoints and tracking of ongoing connections may be disrupted. - # As a result, reply packets may be dropped and the load-balancing decisions - # for established connections may change. - # - # If this option is set to "false" during an upgrade from 1.3 or earlier to - # 1.4 or later, then it may cause one-time disruptions during the upgrade. - preallocate-bpf-maps: "false" - - # Regular expression matching compatible Istio sidecar istio-proxy - # container image names - sidecar-istio-proxy-image: "cilium/istio_proxy" - - # Name of the cluster. Only relevant when building a mesh of clusters. - cluster-name: default - # Unique ID of the cluster. Must be unique across all conneted clusters and - # in the range of 1 and 255. Only relevant when building a mesh of clusters. - cluster-id: "0" - - # Encapsulation mode for communication between nodes - # Possible values: - # - disabled - # - vxlan (default) - # - geneve - tunnel: "vxlan" - # Enables L7 proxy for L7 policy enforcement and visibility - enable-l7-proxy: "true" - - enable-ipv4-masquerade: "true" - enable-ipv6-masquerade: "true" - enable-bpf-masquerade: "false" - - enable-xt-socket-fallback: "true" - install-iptables-rules: "true" - install-no-conntrack-iptables-rules: "false" - - auto-direct-node-routes: "false" - enable-local-redirect-policy: "true" - enable-host-firewall: "true" - # List of devices used to attach bpf_host.o (implements BPF NodePort, - # host-firewall and BPF masquerading) - devices: "eth+" - - kube-proxy-replacement: "strict" - kube-proxy-replacement-healthz-bind-address: "" - bpf-lb-sock: "false" - host-reachable-services-protos: - enable-health-check-nodeport: "true" - node-port-bind-protection: "true" - enable-auto-protect-node-port-range: "true" - enable-svc-source-range-check: "true" - enable-l2-neigh-discovery: "true" - arping-refresh-period: "30s" - k8s-require-ipv4-pod-cidr: "true" - k8s-require-ipv6-pod-cidr: "true" - cni-uninstall: "true" - enable-endpoint-health-checking: "true" - enable-health-checking: "true" - enable-well-known-identities: "false" - enable-remote-node-identity: "true" - synchronize-k8s-nodes: "true" - operator-api-serve-addr: "127.0.0.1:9234" - ipam: "kubernetes" - disable-cnp-status-updates: "true" - enable-vtep: "false" - vtep-endpoint: "" - vtep-cidr: "" - vtep-mask: "" - vtep-mac: "" - enable-k8s-endpoint-slice: "true" - enable-bgp-control-plane: "false" - bpf-root: "/sys/fs/bpf" - cgroup-root: "/sys/fs/cgroup" - enable-k8s-terminating-endpoint: "true" - remove-cilium-node-taints: "true" - set-cilium-is-up-condition: "true" - unmanaged-pod-watcher-interval: "15" - tofqdns-dns-reject-response-code: "refused" - tofqdns-enable-dns-compression: "true" - tofqdns-endpoint-max-ip-per-hostname: "50" - tofqdns-idle-connection-grace-period: "0s" - tofqdns-max-deferred-connection-deletes: "10000" - tofqdns-min-ttl: "3600" - tofqdns-proxy-response-max-delay: "100ms" - agent-not-ready-taint-key: "node.cilium.io/agent-not-ready" ---- -# Source: cilium/templates/cilium-agent/clusterrole.yaml -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRole -metadata: - name: cilium -rules: -- apiGroups: - - networking.k8s.io - resources: - - networkpolicies - verbs: - - get - - list - - watch -- apiGroups: - - discovery.k8s.io - resources: - - endpointslices - verbs: - - get - - list - - watch -- apiGroups: - - "" - resources: - - namespaces - - services - - pods - - endpoints - - nodes - verbs: - - get - - list - - watch -- apiGroups: - - apiextensions.k8s.io - resources: - - customresourcedefinitions - verbs: - - list - - watch - # This is used when validating policies in preflight. This will need to stay - # until we figure out how to avoid "get" inside the preflight, and then - # should be removed ideally. - - get -- apiGroups: - - cilium.io - resources: - - ciliumbgploadbalancerippools - - ciliumbgppeeringpolicies - - ciliumclusterwideenvoyconfigs - - ciliumclusterwidenetworkpolicies - - ciliumegressgatewaypolicies - - ciliumegressnatpolicies - - ciliumendpoints - - ciliumendpointslices - - ciliumenvoyconfigs - - ciliumidentities - - ciliumlocalredirectpolicies - - ciliumnetworkpolicies - - ciliumnodes - verbs: - - list - - watch -- apiGroups: - - cilium.io - resources: - - ciliumidentities - - ciliumendpoints - - ciliumnodes - verbs: - - create -- apiGroups: - - cilium.io - # To synchronize garbage collection of such resources - resources: - - ciliumidentities - verbs: - - update -- apiGroups: - - cilium.io - resources: - - ciliumendpoints - verbs: - - delete - - get -- apiGroups: - - cilium.io - resources: - - ciliumnodes - - ciliumnodes/status - verbs: - - get - - update -- apiGroups: - - cilium.io - resources: - - ciliumnetworkpolicies/status - - ciliumclusterwidenetworkpolicies/status - - ciliumendpoints/status - - ciliumendpoints - verbs: - - patch ---- -# Source: cilium/templates/cilium-operator/clusterrole.yaml -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRole -metadata: - name: cilium-operator -rules: -- apiGroups: - - "" - resources: - - pods - verbs: - - get - - list - - watch - # to automatically delete [core|kube]dns pods so that are starting to being - # managed by Cilium - - delete -- apiGroups: - - "" - resources: - - nodes - verbs: - - list - - watch -- apiGroups: - - "" - resources: - # To remove node taints - - nodes - # To set NetworkUnavailable false on startup - - nodes/status - verbs: - - patch -- apiGroups: - - discovery.k8s.io - resources: - - endpointslices - verbs: - - get - - list - - watch -- apiGroups: - - "" - resources: - # to perform LB IP allocation for BGP - - services/status - verbs: - - update -- apiGroups: - - "" - resources: - # to check apiserver connectivity - - namespaces - verbs: - - get - - list - - watch -- apiGroups: - - "" - resources: - # to perform the translation of a CNP that contains `ToGroup` to its endpoints - - services - - endpoints - verbs: - - get - - list - - watch -- apiGroups: - - cilium.io - resources: - - ciliumnetworkpolicies - - ciliumclusterwidenetworkpolicies - verbs: - # Create auto-generated CNPs and CCNPs from Policies that have 'toGroups' - - create - - update - - deletecollection - # To update the status of the CNPs and CCNPs - - patch - - get - - list - - watch -- apiGroups: - - cilium.io - resources: - - ciliumnetworkpolicies/status - - ciliumclusterwidenetworkpolicies/status - verbs: - # Update the auto-generated CNPs and CCNPs status. - - patch - - update -- apiGroups: - - cilium.io - resources: - - ciliumendpoints - - ciliumidentities - verbs: - # To perform garbage collection of such resources - - delete - - list - - watch -- apiGroups: - - cilium.io - resources: - - ciliumidentities - verbs: - # To synchronize garbage collection of such resources - - update -- apiGroups: - - cilium.io - resources: - - ciliumnodes - verbs: - - create - - update - - get - - list - - watch - # To perform CiliumNode garbage collector - - delete -- apiGroups: - - cilium.io - resources: - - ciliumnodes/status - verbs: - - update -- apiGroups: - - cilium.io - resources: - - ciliumendpointslices - - ciliumenvoyconfigs - verbs: - - create - - update - - get - - list - - watch - - delete -- apiGroups: - - apiextensions.k8s.io - resources: - - customresourcedefinitions - verbs: - - create - - get - - list - - watch -- apiGroups: - - apiextensions.k8s.io - resources: - - customresourcedefinitions - verbs: - - update - resourceNames: - - ciliumbgploadbalancerippools.cilium.io - - ciliumbgppeeringpolicies.cilium.io - - ciliumclusterwideenvoyconfigs.cilium.io - - ciliumclusterwidenetworkpolicies.cilium.io - - ciliumegressgatewaypolicies.cilium.io - - ciliumegressnatpolicies.cilium.io - - ciliumendpoints.cilium.io - - ciliumendpointslices.cilium.io - - ciliumenvoyconfigs.cilium.io - - ciliumexternalworkloads.cilium.io - - ciliumidentities.cilium.io - - ciliumlocalredirectpolicies.cilium.io - - ciliumnetworkpolicies.cilium.io - - ciliumnodes.cilium.io -# For cilium-operator running in HA mode. -# -# Cilium operator running in HA mode requires the use of ResourceLock for Leader Election -# between multiple running instances. -# The preferred way of doing this is to use LeasesResourceLock as edits to Leases are less -# common and fewer objects in the cluster watch "all Leases". -- apiGroups: - - coordination.k8s.io - resources: - - leases - verbs: - - create - - get - - update ---- -# Source: cilium/templates/cilium-agent/clusterrolebinding.yaml -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRoleBinding -metadata: - name: cilium -roleRef: - apiGroup: rbac.authorization.k8s.io - kind: ClusterRole - name: cilium -subjects: -- kind: ServiceAccount - name: "cilium" - namespace: kube-system ---- -# Source: cilium/templates/cilium-operator/clusterrolebinding.yaml -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRoleBinding -metadata: - name: cilium-operator -roleRef: - apiGroup: rbac.authorization.k8s.io - kind: ClusterRole - name: cilium-operator -subjects: -- kind: ServiceAccount - name: "cilium-operator" - namespace: kube-system ---- -# Source: cilium/templates/cilium-agent/service.yaml -apiVersion: v1 -kind: Service -metadata: - name: cilium-agent - namespace: kube-system - annotations: - prometheus.io/scrape: "true" - prometheus.io/port: "9964" - labels: - k8s-app: cilium -spec: - clusterIP: None - type: ClusterIP - selector: - k8s-app: cilium - ports: - - name: envoy-metrics - port: 9964 - protocol: TCP - targetPort: envoy-metrics ---- -# Source: cilium/templates/cilium-agent/daemonset.yaml -apiVersion: apps/v1 -kind: DaemonSet -metadata: - name: cilium - namespace: kube-system - labels: - k8s-app: cilium -spec: - selector: - matchLabels: - k8s-app: cilium - updateStrategy: - rollingUpdate: - maxUnavailable: 2 - type: RollingUpdate - template: - metadata: - annotations: - prometheus.io/port: "9962" - prometheus.io/scrape: "true" - labels: - k8s-app: cilium - spec: - containers: - - name: cilium-agent - image: "quay.io/cilium/cilium:v1.12.8@sha256:b6c3c48b380334b8f08dba6e0c28d906c0d722b8c2beb0d506b3cea27f66f78d" - imagePullPolicy: IfNotPresent - command: - - cilium-agent - args: - - --config-dir=/tmp/cilium/config-map - startupProbe: - httpGet: - host: "127.0.0.1" - path: /healthz - port: 9879 - scheme: HTTP - httpHeaders: - - name: "brief" - value: "true" - failureThreshold: 105 - periodSeconds: 2 - successThreshold: 1 - livenessProbe: - httpGet: - host: "127.0.0.1" - path: /healthz - port: 9879 - scheme: HTTP - httpHeaders: - - name: "brief" - value: "true" - periodSeconds: 30 - successThreshold: 1 - failureThreshold: 10 - timeoutSeconds: 5 - readinessProbe: - httpGet: - host: "127.0.0.1" - path: /healthz - port: 9879 - scheme: HTTP - httpHeaders: - - name: "brief" - value: "true" - periodSeconds: 30 - successThreshold: 1 - failureThreshold: 3 - timeoutSeconds: 5 - env: - - name: K8S_NODE_NAME - valueFrom: - fieldRef: - apiVersion: v1 - fieldPath: spec.nodeName - - name: CILIUM_K8S_NAMESPACE - valueFrom: - fieldRef: - apiVersion: v1 - fieldPath: metadata.namespace - - name: CILIUM_CLUSTERMESH_CONFIG - value: /var/lib/cilium/clustermesh/ - - name: CILIUM_CNI_CHAINING_MODE - valueFrom: - configMapKeyRef: - name: cilium-config - key: cni-chaining-mode - optional: true - - name: CILIUM_CUSTOM_CNI_CONF - valueFrom: - configMapKeyRef: - name: cilium-config - key: custom-cni-conf - optional: true - - name: KUBERNETES_SERVICE_HOST - value: "api.cluster.local" - - name: KUBERNETES_SERVICE_PORT - value: "6443" - lifecycle: - postStart: - exec: - command: - - "/cni-install.sh" - - "--enable-debug=false" - - "--cni-exclusive=true" - - "--log-file=/var/run/cilium/cilium-cni.log" - preStop: - exec: - command: - - /cni-uninstall.sh - resources: - limits: - cpu: 2 - memory: 2Gi - requests: - cpu: 100m - memory: 128Mi - ports: - - name: peer-service - containerPort: 4244 - hostPort: 4244 - protocol: TCP - - name: prometheus - containerPort: 9962 - hostPort: 9962 - protocol: TCP - - name: envoy-metrics - containerPort: 9964 - hostPort: 9964 - protocol: TCP - securityContext: - privileged: true - terminationMessagePolicy: FallbackToLogsOnError - volumeMounts: - - name: bpf-maps - mountPath: /sys/fs/bpf - mountPropagation: Bidirectional - # Check for duplicate mounts before mounting - - name: cilium-cgroup - mountPath: /sys/fs/cgroup - - name: cilium-run - mountPath: /var/run/cilium - - name: etc-cni-netd - mountPath: /host/etc/cni/net.d - - name: clustermesh-secrets - mountPath: /var/lib/cilium/clustermesh - readOnly: true - - name: cilium-config-path - mountPath: /tmp/cilium/config-map - readOnly: true - # Needed to be able to load kernel modules - - name: lib-modules - mountPath: /lib/modules - readOnly: true - - name: xtables-lock - mountPath: /run/xtables.lock - initContainers: - - name: clean-cilium-state - image: "quay.io/cilium/cilium:v1.12.8@sha256:b6c3c48b380334b8f08dba6e0c28d906c0d722b8c2beb0d506b3cea27f66f78d" - imagePullPolicy: IfNotPresent - command: - - /init-container.sh - env: - - name: CILIUM_ALL_STATE - valueFrom: - configMapKeyRef: - name: cilium-config - key: clean-cilium-state - optional: true - - name: CILIUM_BPF_STATE - valueFrom: - configMapKeyRef: - name: cilium-config - key: clean-cilium-bpf-state - optional: true - - name: KUBERNETES_SERVICE_HOST - value: "api.cluster.local" - - name: KUBERNETES_SERVICE_PORT - value: "6443" - terminationMessagePolicy: FallbackToLogsOnError - securityContext: - privileged: true - volumeMounts: - - name: bpf-maps - mountPath: /sys/fs/bpf - # Required to mount cgroup filesystem from the host to cilium agent pod - - name: cilium-cgroup - mountPath: /sys/fs/cgroup - mountPropagation: HostToContainer - - name: cilium-run - mountPath: /var/run/cilium - resources: - requests: - cpu: 100m - memory: 100Mi # wait-for-kube-proxy - # Install the CNI binaries in an InitContainer so we don't have a writable host mount in the agent - - name: install-cni-binaries - image: "quay.io/cilium/cilium:v1.12.8@sha256:b6c3c48b380334b8f08dba6e0c28d906c0d722b8c2beb0d506b3cea27f66f78d" - imagePullPolicy: IfNotPresent - command: - - "/install-plugin.sh" - resources: - requests: - cpu: 100m - memory: 10Mi - securityContext: - capabilities: - drop: - - ALL - terminationMessagePolicy: FallbackToLogsOnError - volumeMounts: - - name: cni-path - mountPath: /host/opt/cni/bin - restartPolicy: Always - priorityClassName: system-node-critical - serviceAccount: "cilium" - serviceAccountName: "cilium" - automountServiceAccountToken: true - terminationGracePeriodSeconds: 1 - hostNetwork: true - affinity: - podAntiAffinity: - requiredDuringSchedulingIgnoredDuringExecution: - - labelSelector: - matchLabels: - k8s-app: cilium - topologyKey: kubernetes.io/hostname - nodeSelector: - kubernetes.io/os: linux - tolerations: - - operator: Exists - volumes: - # To keep state between restarts / upgrades - - name: cilium-run - hostPath: - path: /var/run/cilium - type: DirectoryOrCreate - # To keep state between restarts / upgrades for bpf maps - - name: bpf-maps - hostPath: - path: /sys/fs/bpf - type: DirectoryOrCreate - # To keep state between restarts / upgrades for cgroup2 filesystem - - name: cilium-cgroup - hostPath: - path: /sys/fs/cgroup - type: DirectoryOrCreate - # To install cilium cni plugin in the host - - name: cni-path - hostPath: - path: /opt/cni/bin - type: DirectoryOrCreate - # To install cilium cni configuration in the host - - name: etc-cni-netd - hostPath: - path: /etc/cni/net.d - type: DirectoryOrCreate - # To be able to load kernel modules - - name: lib-modules - hostPath: - path: /lib/modules - # To access iptables concurrently with other processes (e.g. kube-proxy) - - name: xtables-lock - hostPath: - path: /run/xtables.lock - type: FileOrCreate - # To read the clustermesh configuration - - name: clustermesh-secrets - secret: - secretName: cilium-clustermesh - # note: the leading zero means this number is in octal representation: do not remove it - defaultMode: 0400 - optional: true - # To read the configuration from the config map - - name: cilium-config-path - configMap: - name: cilium-config ---- -# Source: cilium/templates/cilium-operator/deployment.yaml -apiVersion: apps/v1 -kind: Deployment -metadata: - name: cilium-operator - namespace: kube-system - labels: - io.cilium/app: operator - name: cilium-operator -spec: - # See docs on ServerCapabilities.LeasesResourceLock in file pkg/k8s/version/version.go - # for more details. - replicas: 1 - selector: - matchLabels: - io.cilium/app: operator - name: cilium-operator - strategy: - rollingUpdate: - maxSurge: 1 - maxUnavailable: 1 - type: RollingUpdate - template: - metadata: - annotations: - # ensure pods roll when configmap updates - cilium.io/cilium-configmap-checksum: "8355b2f8d6da37eedd17661ef948fedfa489901c3e7274ed0a0c586a424b71cb" - labels: - io.cilium/app: operator - name: cilium-operator - spec: - containers: - - name: cilium-operator - image: "quay.io/cilium/operator-generic:v1.12.8@sha256:7431f0c2001fb875b1a8901e103825394c38cd6c63a1435a3273ed20ae0e7578" - imagePullPolicy: IfNotPresent - command: - - cilium-operator-generic - args: - - --config-dir=/tmp/cilium/config-map - - --debug=$(CILIUM_DEBUG) - env: - - name: K8S_NODE_NAME - valueFrom: - fieldRef: - apiVersion: v1 - fieldPath: spec.nodeName - - name: CILIUM_K8S_NAMESPACE - valueFrom: - fieldRef: - apiVersion: v1 - fieldPath: metadata.namespace - - name: CILIUM_DEBUG - valueFrom: - configMapKeyRef: - key: debug - name: cilium-config - optional: true - - name: KUBERNETES_SERVICE_HOST - value: "api.cluster.local" - - name: KUBERNETES_SERVICE_PORT - value: "6443" - livenessProbe: - httpGet: - host: "127.0.0.1" - path: /healthz - port: 9234 - scheme: HTTP - initialDelaySeconds: 60 - periodSeconds: 10 - timeoutSeconds: 3 - volumeMounts: - - name: cilium-config-path - mountPath: /tmp/cilium/config-map - readOnly: true - terminationMessagePolicy: FallbackToLogsOnError - hostNetwork: true - restartPolicy: Always - priorityClassName: system-cluster-critical - serviceAccount: "cilium-operator" - serviceAccountName: "cilium-operator" - automountServiceAccountToken: true - # In HA mode, cilium-operator pods must not be scheduled on the same - # node as they will clash with each other. - affinity: - podAntiAffinity: - requiredDuringSchedulingIgnoredDuringExecution: - - labelSelector: - matchLabels: - io.cilium/app: operator - topologyKey: kubernetes.io/hostname - nodeSelector: - kubernetes.io/os: linux - node-role.kubernetes.io/control-plane: "" - tolerations: - - effect: NoSchedule - operator: Exists - volumes: - # To read the configuration from the config map - - name: cilium-config-path - configMap: - name: cilium-config diff --git a/proxmox/deployments/cilium.yaml b/proxmox/deployments/cilium.yaml deleted file mode 100644 index 12ea06a..0000000 --- a/proxmox/deployments/cilium.yaml +++ /dev/null @@ -1,77 +0,0 @@ ---- - -k8sServiceHost: "api.cluster.local" -k8sServicePort: "6443" - -operator: - enabled: true - rollOutPods: true - replicas: 1 - prometheus: - enabled: false - nodeSelector: - node-role.kubernetes.io/control-plane: "" - tolerations: - - operator: Exists - effect: NoSchedule - -identityAllocationMode: crd -kubeProxyReplacement: strict -enableK8sEndpointSlice: true -localRedirectPolicy: true - -tunnel: "vxlan" -autoDirectNodeRoutes: false -devices: [eth+] - -healthChecking: true - -cni: - install: true - -ipam: - mode: "kubernetes" -k8s: - requireIPv4PodCIDR: true - requireIPv6PodCIDR: true - -bpf: - masquerade: false -ipv4: - enabled: true -ipv6: - enabled: true -hostServices: - enabled: true -hostPort: - enabled: true -nodePort: - enabled: true -externalIPs: - enabled: true -hostFirewall: - enabled: true -ingressController: - enabled: false - -securityContext: - privileged: true - -hubble: - enabled: false - -prometheus: - enabled: true - -cgroup: - autoMount: - enabled: false - hostRoot: /sys/fs/cgroup - -resources: - limits: - cpu: 2 - memory: 2Gi - requests: - cpu: 100m - memory: 128Mi diff --git a/proxmox/deployments/pod-ephemeral.yaml b/proxmox/deployments/pod-ephemeral.yaml index ec7f290..b386aee 100644 --- a/proxmox/deployments/pod-ephemeral.yaml +++ b/proxmox/deployments/pod-ephemeral.yaml @@ -38,7 +38,7 @@ spec: type: pvc-volume spec: accessModes: [ "ReadWriteOnce" ] - storageClassName: proxmox-zfs + storageClassName: proxmox resources: requests: storage: 5Gi diff --git a/proxmox/deployments/proxmox-csi.yaml b/proxmox/deployments/proxmox-csi.yaml deleted file mode 100644 index 467ab80..0000000 --- a/proxmox/deployments/proxmox-csi.yaml +++ /dev/null @@ -1,41 +0,0 @@ - -controller: - plugin: - image: - pullPolicy: Always - tag: edge - -node: - plugin: - image: - pullPolicy: Always - tag: edge - - nodeSelector: - node.cloudprovider.kubernetes.io/platform: nocloud - tolerations: - - operator: Exists - -nodeSelector: - node-role.kubernetes.io/control-plane: "" -tolerations: - - key: node-role.kubernetes.io/control-plane - effect: NoSchedule - -config: - clusters: - - region: "dev-1" - token_id: "root@pam!terraform" - token_secret: "cb6e5561-ce10-4e7e-8b99-155ff6371a48" - url: "https://192.168.10.4:8006/api2/json" - insecure: true - -storageClass: - - name: proxmox - storage: local-lvm - reclaimPolicy: Delete - fstype: xfs - - name: proxmox-zfs - storage: zfs - reclaimPolicy: Delete - fstype: xfs diff --git a/proxmox/deployments/talos-ccm.yaml b/proxmox/deployments/talos-ccm.yaml new file mode 100644 index 0000000..10643ab --- /dev/null +++ b/proxmox/deployments/talos-ccm.yaml @@ -0,0 +1,38 @@ + +service: + containerPort: 50258 + annotations: + prometheus.io/scrape: "true" + prometheus.io/scheme: "https" + prometheus.io/port: "50258" + +logVerbosityLevel: 4 + +transformations: + - name: web + nodeSelector: + - matchExpressions: + - key: hostname + operator: Regexp + values: + - ^web-.+$ + labels: + node-role.kubernetes.io/web: "" + - name: worker + nodeSelector: + - matchExpressions: + - key: hostname + operator: Regexp + values: + - ^worker-.+$ + labels: + node-role.kubernetes.io/worker: "" + - name: db + nodeSelector: + - matchExpressions: + - key: hostname + operator: Regexp + values: + - ^db-.+$ + labels: + node-role.kubernetes.io/db: "" diff --git a/proxmox/deployments/test-statefulset.yaml b/proxmox/deployments/test-statefulset.yaml index be27f8c..4d94de8 100644 --- a/proxmox/deployments/test-statefulset.yaml +++ b/proxmox/deployments/test-statefulset.yaml @@ -21,6 +21,16 @@ spec: nodeSelector: # kubernetes.io/hostname: kube-21 # topology.kubernetes.io/zone: hvm-1 + affinity: + nodeAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - preference: + matchExpressions: + - key: kubernetes.io/hostname + operator: In + values: + - controlplane-41 + weight: 100 containers: - name: alpine image: alpine @@ -46,4 +56,4 @@ spec: resources: requests: storage: 5Gi - storageClassName: proxmox-zfs + storageClassName: proxmox diff --git a/proxmox/deployments/ubuntu.yaml b/proxmox/deployments/ubuntu.yaml new file mode 100644 index 0000000..848875d --- /dev/null +++ b/proxmox/deployments/ubuntu.yaml @@ -0,0 +1,49 @@ +apiVersion: v1 +kind: Pod +metadata: + name: ubuntu + namespace: kube-system +spec: + hostname: ubuntu + hostPID: true + hostNetwork: true + nodeSelector: + kubernetes.io/hostname: controlplane-41 + containers: + - image: ubuntu + command: + - sleep + - "14d" + name: ubuntu + securityContext: + privileged: true + volumeMounts: + - name: dev + mountPath: /dev + - name: root + mountPath: /mnt/root + readOnly: true + - mountPath: /lib/modules + name: lib-modules + readOnly: true + - name: tmp + mountPath: /tmp + priorityClassName: system-node-critical + tolerations: + - operator: Exists + - key: node.kubernetes.io/disk-pressure + operator: Exists + effect: NoSchedule + volumes: + - name: dev + hostPath: + path: /dev + - name: root + hostPath: + path: / + - hostPath: + path: /lib/modules + name: lib-modules + - name: tmp + emptyDir: + medium: Memory diff --git a/proxmox/images/.gitignore b/proxmox/images/.gitignore deleted file mode 100644 index aa3bb53..0000000 --- a/proxmox/images/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -*.qcow2.xz -*.qcow2 diff --git a/proxmox/images/Makefile b/proxmox/images/Makefile deleted file mode 100644 index 30f0add..0000000 --- a/proxmox/images/Makefile +++ /dev/null @@ -1,18 +0,0 @@ -# -REGISTRY ?= ghcr.io/siderolabs -TAG ?= 1.5.4 - -clean: - rm -f nocloud-*.qcow2.xz - rm -f nocloud-*.qcow2 - -init: - packer init -upgrade . - -release: - packer build -only=release.proxmox.talos . - -images: clean - docker run --rm -i -v /dev:/dev --privileged $(REGISTRY)/imager:v$(TAG) oracle \ - --extra-kernel-arg talos.dashboard.disabled=1 --platform nocloud --arch amd64 --tar-to-stdout | tar xz - xz -d nocloud-amd64.qcow2.xz diff --git a/proxmox/images/README.md b/proxmox/images/README.md deleted file mode 100644 index cc8f377..0000000 --- a/proxmox/images/README.md +++ /dev/null @@ -1,54 +0,0 @@ -# Upload Talos image - -We will use `nocloud` image. - -## Method 1: packer - -``` -make init -make release -``` - -## Method 1: manual - -Create the VM, config example: - -```yaml -agent: 0 -boot: order=scsi0;ide2;net0 -cores: 1 -cpu: host -kvm: 1 -balloon: 0 -memory: 3072 -name: talos -net0: virtio=... -onboot: 0 -ostype: l26 -ide2: cdrom,media=cdrom -scsi0: local-lvm:vm-106-disk-0,size=32G -scsihw: virtio-scsi-single -serial0: socket -smbios1: uuid=... -numa: 0 -sockets: 1 -template: 1 -``` - -Find the name of system disk. -In example it - `local-lvm:vm-106-disk-0`, lvm volume `vm-106-disk-0` - -We copy Talos system disk to this volume. - -```shell -cd /tmp -wget https://github.com/siderolabs/talos/releases/download/v1.4.1/nocloud-amd64.raw.xz -xz -d -c nocloud-amd64.raw.xz | dd of=/dev/mapper/vg0-vm--106--disk--0 -``` - -And then, convert it to template. - -# Resources - -* https://developer.hashicorp.com/packer/plugins/builders/proxmox/iso -* https://wiki.archlinux.org/title/Arch_Linux_on_a_VPS diff --git a/proxmox/images/proxmox.pkr.hcl b/proxmox/images/proxmox.pkr.hcl deleted file mode 100644 index 03ed1ed..0000000 --- a/proxmox/images/proxmox.pkr.hcl +++ /dev/null @@ -1,80 +0,0 @@ - -packer { - required_plugins { - proxmox = { - version = ">= 1.1.3" - source = "github.com/hashicorp/proxmox" - } - } -} - -source "proxmox" "talos" { - proxmox_url = "https://${var.proxmox_host}:8006/api2/json" - username = var.proxmox_username - token = var.proxmox_token - node = var.proxmox_nodename - insecure_skip_tls_verify = true - - iso_file = "local:iso/archlinux-2023.05.03-x86_64.iso" - # iso_url = "https://mirror.rackspace.com/archlinux/iso/2023.05.03/archlinux-2023.05.03-x86_64.iso" - # iso_checksum = "sha1:3ae7c83eca8bd698b4e54c49d43e8de5dc8a4456" - # iso_storage_pool = "local" - unmount_iso = true - - network_adapters { - bridge = "vmbr0" - model = "virtio" - firewall = true - } - network_adapters { - bridge = "vmbr1" - model = "virtio" - } - - scsi_controller = "virtio-scsi-single" - disks { - type = "scsi" - storage_pool = var.proxmox_storage - format = "raw" - disk_size = "5G" - io_thread = "true" - cache_mode = "writethrough" - } - - cpu_type = "host" - memory = 3072 - # vga { - # type = "serial0" - # } - serials = ["socket"] - - ssh_username = "root" - ssh_password = "packer" - ssh_timeout = "15m" - qemu_agent = true - - # ssh_bastion_host = var.proxmox_host - # ssh_bastion_username = "root" - # ssh_bastion_agent_auth = true - - template_name = "talos" - template_description = "Talos system disk, version ${var.talos_version}" - - boot_wait = "15s" - boot_command = [ - "", - "passwdpackerpacker" - ] -} - -build { - name = "release" - sources = ["source.proxmox.talos"] - - provisioner "shell" { - inline = [ - "curl -L ${local.image} -o /tmp/talos.raw.xz", - "xz -d -c /tmp/talos.raw.xz | dd of=/dev/sda && sync", - ] - } -} diff --git a/proxmox/images/variables.pkr.hcl b/proxmox/images/variables.pkr.hcl deleted file mode 100644 index ccd5a4e..0000000 --- a/proxmox/images/variables.pkr.hcl +++ /dev/null @@ -1,33 +0,0 @@ - -variable "proxmox_host" { - type = string -} - -variable "proxmox_username" { - type = string -} - -variable "proxmox_token" { - type = string -} - -variable "proxmox_nodename" { - type = string -} - -variable "proxmox_storage" { - type = string -} - -variable "proxmox_storage_type" { - type = string -} - -variable "talos_version" { - type = string - default = "v1.4.1" -} - -locals { - image = "https://github.com/talos-systems/talos/releases/download/${var.talos_version}/nocloud-amd64.raw.xz" -} diff --git a/proxmox/init/outputs.tf b/proxmox/init/outputs.tf new file mode 100644 index 0000000..b8d3d7d --- /dev/null +++ b/proxmox/init/outputs.tf @@ -0,0 +1,10 @@ + +output "ccm" { + sensitive = true + value = proxmox_virtual_environment_user_token.ccm.value +} + +output "csi" { + sensitive = true + value = proxmox_virtual_environment_user_token.csi.value +} diff --git a/proxmox/init/users.tf b/proxmox/init/users.tf index 62cbe12..c8b28c3 100644 --- a/proxmox/init/users.tf +++ b/proxmox/init/users.tf @@ -1,9 +1,9 @@ -resource "random_password" "kubernetes" { - length = 16 - override_special = "_%@" - special = true -} +# resource "random_password" "kubernetes" { +# length = 16 +# override_special = "_%@" +# special = true +# } resource "proxmox_virtual_environment_user" "kubernetes" { acl { @@ -17,7 +17,35 @@ resource "proxmox_virtual_environment_user" "kubernetes" { role_id = proxmox_virtual_environment_role.csi.role_id } - comment = "Kubernetes" - password = random_password.kubernetes.result - user_id = "kubernetes@pve" + comment = "Kubernetes" + # password = random_password.kubernetes.result + user_id = "kubernetes@pve" +} + +resource "proxmox_virtual_environment_user_token" "ccm" { + comment = "Kubernetes CCM" + token_name = "ccm" + user_id = proxmox_virtual_environment_user.kubernetes.user_id +} + +resource "proxmox_virtual_environment_user_token" "csi" { + comment = "Kubernetes CSI" + token_name = "csi" + user_id = proxmox_virtual_environment_user.kubernetes.user_id +} + +resource "proxmox_virtual_environment_acl" "ccm" { + token_id = proxmox_virtual_environment_user_token.ccm.id + role_id = proxmox_virtual_environment_role.ccm.role_id + + path = "/" + propagate = true +} + +resource "proxmox_virtual_environment_acl" "csi" { + token_id = proxmox_virtual_environment_user_token.csi.id + role_id = proxmox_virtual_environment_role.csi.role_id + + path = "/" + propagate = true } diff --git a/proxmox/init/variables.tf b/proxmox/init/variables.tf index 5d690b7..4185cb7 100644 --- a/proxmox/init/variables.tf +++ b/proxmox/init/variables.tf @@ -5,11 +5,6 @@ variable "proxmox_host" { default = "192.168.1.1" } -variable "proxmox_nodename" { - description = "Proxmox node name" - type = string -} - variable "proxmox_token_id" { description = "Proxmox token id" type = string diff --git a/proxmox/init/versions.tf b/proxmox/init/versions.tf index a71a692..457649e 100644 --- a/proxmox/init/versions.tf +++ b/proxmox/init/versions.tf @@ -2,7 +2,7 @@ terraform { required_providers { proxmox = { source = "bpg/proxmox" - version = "0.18.2" + version = "0.60.0" } } required_version = ">= 1.0" diff --git a/proxmox/instances-controlplane.tf b/proxmox/instances-controlplane.tf index 0ffebd5..5ef7257 100644 --- a/proxmox/instances-controlplane.tf +++ b/proxmox/instances-controlplane.tf @@ -1,33 +1,44 @@ locals { controlplane_prefix = "controlplane" + controlplane_labels = "node-pool=controlplane" + controlplanes = { for k in flatten([ for zone in local.zones : [ for inx in range(lookup(try(var.controlplane[zone], {}), "count", 0)) : { id : lookup(try(var.controlplane[zone], {}), "id", 9000) + inx - name : "${local.controlplane_prefix}-${lower(substr(zone, -1, -1))}${1 + inx}" + name : "${local.controlplane_prefix}-${format("%02d", index(local.zones, zone))}${format("%x", 10 + inx)}" zone : zone - node_name : zone cpu : lookup(try(var.controlplane[zone], {}), "cpu", 1) mem : lookup(try(var.controlplane[zone], {}), "mem", 2048) - ip0 : lookup(try(var.controlplane[zone], {}), "ip0", "ip6=auto") - ipv4 : "${cidrhost(local.controlplane_subnet, index(local.zones, zone) + inx)}/24" - gwv4 : local.gwv4 + + hvv4 = cidrhost(local.subnets[zone], 0) + ipv4 : cidrhost(local.subnets[zone], -(2 + inx)) + gwv4 : cidrhost(local.subnets[zone], 0) + + ipv6ula : cidrhost(cidrsubnet(var.vpc_main_cidr[1], 16, index(local.zones, zone)), 512 + lookup(try(var.controlplane[zone], {}), "id", 9000) + inx) + ipv6 : cidrhost(cidrsubnet(lookup(try(var.nodes[zone], {}), "ip6", "fe80::/64"), 16, index(local.zones, zone)), 512 + lookup(try(var.controlplane[zone], {}), "id", 9000) + inx) + gwv6 : lookup(try(var.nodes[zone], {}), "gw6", "fe80::1") } ] ]) : k.name => k } + + controlplane_v4 = [for ip in local.controlplanes : ip.ipv4] + controlplane_v6 = [for ip in local.controlplanes : ip.ipv6] } -resource "null_resource" "controlplane_metadata" { - for_each = local.controlplanes - connection { - type = "ssh" - user = "root" - host = "${each.value.node_name}.${var.proxmox_domain}" - } +output "controlplanes" { + value = local.controlplanes +} - provisioner "file" { - content = templatefile("${path.module}/templates/metadata.yaml", { +resource "proxmox_virtual_environment_file" "controlplane_metadata" { + for_each = local.controlplanes + node_name = each.value.zone + content_type = "snippets" + datastore_id = "local" + + source_raw { + data = templatefile("${path.module}/templates/metadata.yaml", { hostname : each.value.name, id : each.value.id, providerID : "proxmox://${var.region}/${each.value.id}", @@ -35,98 +46,168 @@ resource "null_resource" "controlplane_metadata" { zone : each.value.zone, region : var.region, }) - destination = "/var/lib/vz/snippets/${each.value.name}.metadata.yaml" - } - - triggers = { - params = join(",", [for k, v in local.controlplanes[each.key] : "${k}-${v}"]) + file_name = "${each.value.name}.metadata.yaml" } } -resource "proxmox_vm_qemu" "controlplane" { +resource "proxmox_virtual_environment_vm" "controlplane" { for_each = local.controlplanes name = each.value.name - vmid = each.value.id - target_node = each.value.node_name - clone = var.proxmox_image + node_name = each.value.zone + vm_id = each.value.id + description = "Talos controlplane at ${var.region}" - agent = 0 - define_connection_info = false - os_type = "ubuntu" - qemu_os = "l26" - # ipconfig1 = each.value.ip0 - ipconfig0 = "ip=${each.value.ipv4},gw=${each.value.gwv4}" - cicustom = "meta=local:snippets/${each.value.name}.metadata.yaml" - cloudinit_cdrom_storage = var.proxmox_storage - - onboot = false - cpu = "host,flags=+aes" - sockets = 1 - cores = each.value.cpu - memory = each.value.mem - scsihw = "virtio-scsi-pci" - - vga { - memory = 0 - type = "serial0" + machine = "pc" + cpu { + architecture = "x86_64" + cores = each.value.cpu + sockets = 1 + numa = true + type = "host" } - serial { - id = 0 - type = "socket" + memory { + dedicated = each.value.mem } - network { - model = "virtio" - bridge = "vmbr0" - # firewall = true - } - # network { - # model = "virtio" - # bridge = "vmbr1" - # } - - boot = "order=scsi0" + scsi_hardware = "virtio-scsi-single" disk { - type = "scsi" - storage = var.proxmox_storage - size = "32G" - cache = "writethrough" - ssd = 1 - backup = false + datastore_id = var.nodes[each.value.zone].storage + interface = "scsi0" + iothread = true + cache = "none" + size = 50 + ssd = true + file_format = "raw" + } + clone { + vm_id = proxmox_virtual_environment_vm.template[each.value.zone].id + } + + initialization { + dns { + servers = ["1.1.1.1", "2001:4860:4860::8888", each.value.hvv4] + } + ip_config { + ipv6 { + address = "${each.value.ipv6}/64" + gateway = each.value.gwv6 + } + } + ip_config { + ipv4 { + address = "${each.value.ipv4}/24" + gateway = each.value.hvv4 + } + ipv6 { + address = "${each.value.ipv6ula}/64" + } + } + + datastore_id = var.nodes[each.value.zone].storage + meta_data_file_id = proxmox_virtual_environment_file.controlplane_metadata[each.key].id + } + + network_device { + bridge = "vmbr0" + queues = each.value.cpu + mtu = 1500 + mac_address = "32:90:${join(":", formatlist("%02X", split(".", each.value.ipv4)))}" + firewall = true + } + network_device { + bridge = "vmbr1" + queues = each.value.cpu + mtu = 1400 + firewall = false + } + + operating_system { + type = "l26" + } + tpm_state { + version = "v2.0" + datastore_id = var.nodes[each.value.zone].storage + } + + serial_device {} + vga { + type = "serial0" } lifecycle { ignore_changes = [ - boot, - network, - desc, - numa, - agent, - ipconfig0, - ipconfig1, - define_connection_info, + started, + ipv4_addresses, + ipv6_addresses, + network_interface_names, + initialization, + cpu, + memory, + disk, + clone, + network_device, ] } - depends_on = [null_resource.controlplane_metadata] + tags = [local.kubernetes["clusterName"]] + depends_on = [proxmox_virtual_environment_file.controlplane_metadata] +} + +resource "proxmox_virtual_environment_firewall_options" "controlplane" { + for_each = local.controlplanes + node_name = each.value.zone + vm_id = each.value.id + enabled = true + + dhcp = false + ipfilter = false + log_level_in = "nolog" + log_level_out = "nolog" + macfilter = false + ndp = false + input_policy = "DROP" + output_policy = "ACCEPT" + radv = true + + depends_on = [proxmox_virtual_environment_vm.controlplane] +} + +resource "proxmox_virtual_environment_firewall_rules" "controlplane" { + for_each = local.controlplanes + node_name = each.value.zone + vm_id = each.value.id + + dynamic "rule" { + for_each = { for idx, rule in split(",", var.security_groups["controlplane"]) : idx => rule } + content { + enabled = true + security_group = rule.value + } + } + + depends_on = [proxmox_virtual_environment_vm.controlplane, proxmox_virtual_environment_firewall_options.controlplane] } resource "local_sensitive_file" "controlplane" { for_each = local.controlplanes content = templatefile("${path.module}/templates/controlplane.yaml.tpl", - merge(var.kubernetes, { + merge(local.kubernetes, try(var.instances["all"], {}), { name = each.value.name - ipv4_vip = local.ipv4_vip - nodeSubnets = local.controlplane_subnet + labels = local.controlplane_labels + nodeSubnets = [local.subnets[each.value.zone], var.vpc_main_cidr[1]] + lbv4 = local.lbv4 + ipv4 = each.value.ipv4 + gwv4 = each.value.gwv4 + ipv6 = "${each.value.ipv6}/64" + gwv6 = each.value.gwv6 clusters = yamlencode({ - clusters = [ - { - token_id = var.proxmox_token_id - token_secret = var.proxmox_token_secret - url = "https://${var.proxmox_host}:8006/api2/json" - region = var.region - }, - ] + "clusters" : [{ + "url" : "https://${each.value.hvv4}:8006/api2/json", + "insecure" : true, + "token_id" : split("=", local.proxmox_token)[0], + "token_secret" : split("=", local.proxmox_token)[1], + "region" : var.region, + }] }) }) ) @@ -134,10 +215,10 @@ resource "local_sensitive_file" "controlplane" { file_permission = "0600" } -resource "null_resource" "controlplane" { - for_each = local.controlplanes - provisioner "local-exec" { - command = "echo talosctl apply-config --insecure --nodes ${split("/", each.value.ipv4)[0]} --config-patch @_cfgs/${each.value.name}.yaml --file _cfgs/controlplane.yaml" - } - depends_on = [proxmox_vm_qemu.controlplane, local_sensitive_file.controlplane] +locals { + controlplane_config = { for k, v in local.controlplanes : k => "talosctl apply-config --insecure --nodes ${v.ipv6} --config-patch @_cfgs/${v.name}.yaml --file _cfgs/controlplane.yaml" } +} + +output "controlplane_config" { + value = local.controlplane_config } diff --git a/proxmox/instances-db.tf b/proxmox/instances-db.tf new file mode 100644 index 0000000..65682c8 --- /dev/null +++ b/proxmox/instances-db.tf @@ -0,0 +1,230 @@ + +locals { + db_prefix = "db" + db_labels = "node-pool=db" + + dbs = { for k in flatten([ + for zone in local.zones : [ + for inx in range(lookup(try(var.instances[zone], {}), "db_count", 0)) : { + id : lookup(try(var.instances[zone], {}), "db_id", 9000) + inx + name : "${local.db_prefix}-${format("%02d", index(local.zones, zone))}${format("%x", 10 + inx)}" + zone : zone + cpu : lookup(try(var.instances[zone], {}), "db_cpu", 1) + cpus : lookup(try(var.instances[zone], {}), "db_affinity", "") != "" ? lookup(var.instances[zone], "db_affinity") : join(",", slice( + flatten(local.cpus[zone]), + 2 * data.proxmox_virtual_environment_node.node[zone].cpu_count - (inx + 1) * lookup(try(var.instances[zone], {}), "db_cpu", 1), + 2 * data.proxmox_virtual_environment_node.node[zone].cpu_count - inx * lookup(try(var.instances[zone], {}), "db_cpu", 1) + )) + numas : [0] + # range( + # length(local.cpu_numa[zone]) - (inx + 1) * lookup(try(var.instances[zone], {}), "db_numas", 1), + # length(local.cpu_numa[zone]) - inx * lookup(try(var.instances[zone], {}), "db_numas", 1) + # ) + mem : lookup(try(var.instances[zone], {}), "db_mem", 2048) + + hvv4 = cidrhost(local.subnets[zone], 0) + ipv4 : cidrhost(local.subnets[zone], 5 + inx) + gwv4 : cidrhost(local.subnets[zone], 0) + + ipv6ula : cidrhost(cidrsubnet(var.vpc_main_cidr[1], 16, index(local.zones, zone)), 512 + lookup(try(var.instances[zone], {}), "db_id", 9000) + inx) + ipv6 : cidrhost(cidrsubnet(lookup(try(var.nodes[zone], {}), "ip6", "fe80::/64"), 16, 1 + index(local.zones, zone)), 512 + lookup(try(var.instances[zone], {}), "db_id", 9000) + inx) + gwv6 : lookup(try(var.nodes[zone], {}), "gw6", "fe80::1") + } + ] + ]) : k.name => k } +} + +resource "proxmox_virtual_environment_file" "db_machineconfig" { + for_each = local.dbs + node_name = each.value.zone + content_type = "snippets" + datastore_id = "local" + + source_raw { + data = templatefile("${path.module}/templates/${lookup(var.instances[each.value.zone], "db_template", "worker.yaml.tpl")}", + merge(local.kubernetes, try(var.instances["all"], {}), { + labels = join(",", [local.web_labels, lookup(var.instances[each.value.zone], "db_labels", "")]) + nodeSubnets = [local.subnets[each.value.zone], var.vpc_main_cidr[1]] + lbv4 = local.lbv4 + ipv4 = each.value.ipv4 + gwv4 = each.value.gwv4 + hvv4 = each.value.hvv4 + ipv6 = "${each.value.ipv6}/64" + gwv6 = each.value.gwv6 + kernelArgs = [] + })) + file_name = "${each.value.name}.yaml" + } +} + +resource "proxmox_virtual_environment_file" "db_metadata" { + for_each = local.dbs + node_name = each.value.zone + content_type = "snippets" + datastore_id = "local" + + source_raw { + data = templatefile("${path.module}/templates/metadata.yaml", { + hostname : each.value.name, + id : each.value.id, + providerID : "proxmox://${var.region}/${each.value.id}", + type : "${each.value.cpu}VCPU-${floor(each.value.mem / 1024)}GB", + zone : each.value.zone, + region : var.region, + }) + file_name = "${each.value.name}.metadata.yaml" + } +} + +resource "proxmox_virtual_environment_vm" "db" { + for_each = local.dbs + name = each.value.name + node_name = each.value.zone + vm_id = each.value.id + description = "Talos database node" + + startup { + order = 5 + up_delay = 5 + } + + machine = "pc" + cpu { + architecture = "x86_64" + cores = each.value.cpu + affinity = each.value.cpus + sockets = 1 + numa = true + type = "host" + } + memory { + dedicated = each.value.mem + # hugepages = "1024" + # keep_hugepages = true + } + dynamic "numa" { + for_each = { for idx, numa in each.value.numas : numa => { + device = "numa${idx}" + cpus = "${idx * (each.value.cpu / length(each.value.numas))}-${(idx + 1) * (each.value.cpu / length(each.value.numas)) - 1}" + mem = each.value.mem / length(each.value.numas) + } } + content { + device = numa.value.device + cpus = numa.value.cpus + hostnodes = numa.key + memory = numa.value.mem + policy = "bind" + } + } + + scsi_hardware = "virtio-scsi-single" + disk { + datastore_id = lookup(try(var.nodes[each.value.zone], {}), "storage", "local") + interface = "scsi0" + iothread = true + cache = "none" + size = 32 + ssd = true + file_format = "raw" + } + clone { + vm_id = proxmox_virtual_environment_vm.template[each.value.zone].id + } + + initialization { + dns { + servers = [each.value.gwv4, "2001:4860:4860::8888"] + } + ip_config { + ipv6 { + address = "${each.value.ipv6}/64" + gateway = each.value.gwv6 + } + } + ip_config { + ipv4 { + address = "${each.value.ipv4}/24" + gateway = each.value.hvv4 + } + ipv6 { + address = "${each.value.ipv6ula}/64" + } + } + + datastore_id = "local" + meta_data_file_id = proxmox_virtual_environment_file.db_metadata[each.key].id + user_data_file_id = proxmox_virtual_environment_file.db_machineconfig[each.key].id + } + + network_device { + bridge = "vmbr0" + queues = each.value.cpu + mtu = 1500 + mac_address = "32:90:${join(":", formatlist("%02X", split(".", each.value.ipv4)))}" + firewall = true + } + network_device { + bridge = "vmbr1" + queues = each.value.cpu + mtu = 1400 + firewall = false + } + + operating_system { + type = "l26" + } + + serial_device {} + vga { + type = "serial0" + } + + lifecycle { + ignore_changes = [ + started, + clone, + ipv4_addresses, + ipv6_addresses, + network_interface_names, + initialization, + disk, + # memory, + # numa, + ] + } + + tags = [local.kubernetes["clusterName"]] + depends_on = [proxmox_virtual_environment_file.db_machineconfig] +} + +resource "proxmox_virtual_environment_firewall_options" "db" { + for_each = local.dbs + node_name = each.value.zone + vm_id = each.value.id + enabled = true + + dhcp = false + ipfilter = false + log_level_in = "nolog" + log_level_out = "nolog" + macfilter = false + ndp = true + input_policy = "DROP" + output_policy = "ACCEPT" + radv = false + + depends_on = [proxmox_virtual_environment_vm.db] +} + +resource "proxmox_virtual_environment_firewall_rules" "db" { + for_each = { for k, v in local.dbs : k => v if lookup(try(var.instances[v.zone], {}), "db_sg", "") != "" } + node_name = each.value.zone + vm_id = each.value.id + + rule { + enabled = true + security_group = lookup(var.instances[each.value.zone], "db_sg") + } + + depends_on = [proxmox_virtual_environment_vm.db, proxmox_virtual_environment_firewall_options.db] +} diff --git a/proxmox/instances-web.tf b/proxmox/instances-web.tf index 199076d..a882be4 100644 --- a/proxmox/instances-web.tf +++ b/proxmox/instances-web.tf @@ -1,61 +1,65 @@ locals { web_prefix = "web" - web_labels = "project.io/node-pool=web" + web_labels = "node-pool=web" webs = { for k in flatten([ for zone in local.zones : [ for inx in range(lookup(try(var.instances[zone], {}), "web_count", 0)) : { id : lookup(try(var.instances[zone], {}), "web_id", 9000) + inx - name : "${local.web_prefix}-${lower(substr(zone, -1, -1))}${1 + inx}" + name : "${local.web_prefix}-${format("%02d", index(local.zones, zone))}${format("%x", 10 + inx)}" zone : zone - node_name : zone cpu : lookup(try(var.instances[zone], {}), "web_cpu", 1) + cpus : join(",", slice( + flatten(local.cpus[zone]), + inx * lookup(try(var.instances[zone], {}), "web_cpu", 1), (inx + 1) * lookup(try(var.instances[zone], {}), "web_cpu", 1) + )) + numas : [0] # [inx] mem : lookup(try(var.instances[zone], {}), "web_mem", 2048) - ip0 : lookup(try(var.instances[zone], {}), "web_ip0", "ip6=auto") - ipv4 : "${cidrhost(local.subnets[zone], inx)}/24" - gwv4 : local.gwv4 + + hvv4 = cidrhost(local.subnets[zone], 0) + ipv4 : cidrhost(local.subnets[zone], 1 + inx) + gwv4 : cidrhost(local.subnets[zone], 0) + + ipv6ula : cidrhost(cidrsubnet(var.vpc_main_cidr[1], 16, index(local.zones, zone)), 256 + lookup(try(var.instances[zone], {}), "web_id", 9000) + inx) + ipv6 : cidrhost(cidrsubnet(lookup(try(var.nodes[zone], {}), "ip6", "fe80::/64"), 16, 1 + index(local.zones, zone)), 256 + lookup(try(var.instances[zone], {}), "web_id", 9000) + inx) + gwv6 : lookup(try(var.nodes[zone], {}), "gw6", "fe80::1") } ] ]) : k.name => k } } -resource "null_resource" "web_machineconfig" { - for_each = { for k, v in var.instances : k => v if lookup(try(var.instances[k], {}), "web_count", 0) > 0 } - connection { - type = "ssh" - user = "root" - host = "${each.key}.${var.proxmox_domain}" - } +resource "proxmox_virtual_environment_file" "web_machineconfig" { + for_each = local.webs + node_name = each.value.zone + content_type = "snippets" + datastore_id = "local" - provisioner "file" { - # source = "${path.module}/_cfgs/worker.yaml" - content = templatefile("${path.module}/templates/web.yaml.tpl", - merge(var.kubernetes, try(var.instances["all"], {}), { - lbv4 = local.ipv4_vip - nodeSubnets = var.vpc_main_cidr - clusterDns = cidrhost(split(",", var.kubernetes["serviceSubnets"])[0], 10) - labels = local.web_labels + source_raw { + data = templatefile("${path.module}/templates/${lookup(var.instances[each.value.zone], "web_template", "worker.yaml.tpl")}", + merge(local.kubernetes, try(var.instances["all"], {}), { + labels = join(",", [local.web_labels, lookup(var.instances[each.value.zone], "web_labels", "")]) + nodeSubnets = [local.subnets[each.value.zone], var.vpc_main_cidr[1]] + lbv4 = local.lbv4 + ipv4 = each.value.ipv4 + gwv4 = each.value.gwv4 + hvv4 = each.value.hvv4 + ipv6 = "${each.value.ipv6}/64" + gwv6 = each.value.gwv6 + kernelArgs = [] })) - - destination = "/var/lib/vz/snippets/${local.web_prefix}.yaml" - } - - triggers = { - params = filemd5("${path.module}/templates/web.yaml.tpl") + file_name = "${each.value.name}.yaml" } } -resource "null_resource" "web_metadata" { - for_each = local.webs - connection { - type = "ssh" - user = "root" - host = "${each.value.node_name}.${var.proxmox_domain}" - } +resource "proxmox_virtual_environment_file" "web_metadata" { + for_each = local.webs + node_name = each.value.zone + content_type = "snippets" + datastore_id = "local" - provisioner "file" { - content = templatefile("${path.module}/templates/metadata.yaml", { + source_raw { + data = templatefile("${path.module}/templates/metadata.yaml", { hostname : each.value.name, id : each.value.id, providerID : "proxmox://${var.region}/${each.value.id}", @@ -63,79 +67,181 @@ resource "null_resource" "web_metadata" { zone : each.value.zone, region : var.region, }) - destination = "/var/lib/vz/snippets/${each.value.name}.metadata.yaml" - } - - triggers = { - params = join(",", [for k, v in local.webs[each.key] : "${k}-${v}"]) + file_name = "${each.value.name}.metadata.yaml" } } -resource "proxmox_vm_qemu" "web" { +# resource "null_resource" "web_nlb_forward" { +# for_each = { for k, v in var.instances : k => v if lookup(try(var.instances[k], {}), "web_count", 0) > 0 } +# connection { +# type = "ssh" +# user = "root" +# host = "${each.key}.${var.proxmox_domain}" +# } + +# provisioner "file" { +# content = jsonencode(yamldecode(templatefile("${path.module}/templates/nlb_forward.tpl", { +# node : each.key +# webs : local.webs +# }))) +# destination = "/etc/ansible/facts.d/nlb_forward.fact" +# } + +# triggers = { +# params = filemd5("${path.module}/templates/nlb_forward.tpl") +# webs = md5(jsonencode([for w in local.webs : w.ipv4 if w.zone == each.key])) +# } +# } + +resource "proxmox_virtual_environment_vm" "web" { for_each = local.webs name = each.value.name - vmid = each.value.id - target_node = each.value.node_name - clone = var.proxmox_image + node_name = each.value.zone + vm_id = each.value.id + description = "Talos web node" - agent = 0 - define_connection_info = false - os_type = "ubuntu" - qemu_os = "l26" - # ipconfig0 = each.value.ip0 - ipconfig0 = "ip=${each.value.ipv4},gw=${each.value.gwv4}" - cicustom = "user=local:snippets/${local.web_prefix}.yaml,meta=local:snippets/${each.value.name}.metadata.yaml" - cloudinit_cdrom_storage = var.proxmox_storage - - onboot = false - cpu = "host,flags=+aes" - sockets = 1 - cores = each.value.cpu - memory = each.value.mem - numa = true - scsihw = "virtio-scsi-single" - - vga { - memory = 0 - type = "serial0" - } - serial { - id = 0 - type = "socket" + startup { + order = 3 + up_delay = 5 } - network { - model = "virtio" - bridge = "vmbr0" - firewall = true + machine = "pc" + cpu { + architecture = "x86_64" + cores = each.value.cpu + affinity = each.value.cpus + sockets = 1 + numa = true + type = "host" + } + memory { + dedicated = each.value.mem + # hugepages = "1024" + # keep_hugepages = true + } + dynamic "numa" { + for_each = { for idx, numa in each.value.numas : numa => { + device = "numa${idx}" + cpus = "0-${each.value.cpu - 1}" + mem = each.value.mem + } } + content { + device = numa.value.device + cpus = numa.value.cpus + hostnodes = numa.key + memory = numa.value.mem + policy = "bind" + } } - # network { - # model = "virtio" - # bridge = "vmbr1" - # } - boot = "order=scsi0" + scsi_hardware = "virtio-scsi-single" disk { - type = "scsi" - storage = var.proxmox_storage - size = "32G" - cache = "writethrough" - ssd = 1 - backup = false + datastore_id = lookup(try(var.nodes[each.value.zone], {}), "storage", "local") + interface = "scsi0" + iothread = true + ssd = true + cache = "none" + size = 32 + file_format = "raw" + } + clone { + vm_id = proxmox_virtual_environment_vm.template[each.value.zone].id + } + + initialization { + dns { + servers = [each.value.gwv4, "2001:4860:4860::8888"] + } + ip_config { + ipv6 { + address = "${each.value.ipv6}/64" + gateway = each.value.gwv6 + } + } + ip_config { + ipv4 { + address = "${each.value.ipv4}/24" + gateway = each.value.hvv4 + } + ipv6 { + address = "${each.value.ipv6ula}/64" + } + } + + datastore_id = "local" + meta_data_file_id = proxmox_virtual_environment_file.web_metadata[each.key].id + user_data_file_id = proxmox_virtual_environment_file.web_machineconfig[each.key].id + } + + network_device { + bridge = "vmbr0" + queues = each.value.cpu + mtu = 1500 + mac_address = "32:90:${join(":", formatlist("%02X", split(".", each.value.ipv4)))}" + firewall = true + } + network_device { + bridge = "vmbr1" + queues = each.value.cpu + mtu = 1400 + firewall = false + } + + operating_system { + type = "l26" + } + + serial_device {} + vga { + type = "serial0" } lifecycle { ignore_changes = [ - boot, - network, - desc, - numa, - agent, - ipconfig0, - ipconfig1, - define_connection_info, + started, + clone, + ipv4_addresses, + ipv6_addresses, + network_interface_names, + initialization, + disk, + # memory, + # numa, ] } - depends_on = [null_resource.web_machineconfig, null_resource.web_metadata] + tags = [local.kubernetes["clusterName"]] + depends_on = [proxmox_virtual_environment_file.web_machineconfig] +} + +resource "proxmox_virtual_environment_firewall_options" "web" { + for_each = local.webs + node_name = each.value.zone + vm_id = each.value.id + enabled = true + + dhcp = false + ipfilter = false + log_level_in = "nolog" + log_level_out = "nolog" + macfilter = false + ndp = true + input_policy = "DROP" + output_policy = "ACCEPT" + radv = false + + depends_on = [proxmox_virtual_environment_vm.web] +} + +resource "proxmox_virtual_environment_firewall_rules" "web" { + for_each = { for k, v in local.webs : k => v if lookup(try(var.instances[v.zone], {}), "web_sg", "") != "" } + node_name = each.value.zone + vm_id = each.value.id + + rule { + enabled = true + security_group = lookup(var.instances[each.value.zone], "web_sg") + } + + depends_on = [proxmox_virtual_environment_vm.web, proxmox_virtual_environment_firewall_options.web] } diff --git a/proxmox/instances-worker.tf b/proxmox/instances-worker.tf index e2bc572..3de21e0 100644 --- a/proxmox/instances-worker.tf +++ b/proxmox/instances-worker.tf @@ -1,59 +1,66 @@ locals { worker_prefix = "worker" - worker_labels = "project.io/node-pool=worker" + worker_labels = "node-pool=worker" workers = { for k in flatten([ for zone in local.zones : [ for inx in range(lookup(try(var.instances[zone], {}), "worker_count", 0)) : { id : lookup(try(var.instances[zone], {}), "worker_id", 9000) + inx - name : "${local.worker_prefix}-${lower(substr(zone, -1, -1))}${1 + inx}" + name : "${local.worker_prefix}-${format("%02d", index(local.zones, zone))}${format("%x", 10 + inx)}" zone : zone node_name : zone cpu : lookup(try(var.instances[zone], {}), "worker_cpu", 1) + cpus : join(",", slice( + flatten(local.cpus[zone]), + (inx + 2) * lookup(try(var.instances[zone], {}), "worker_cpu", 1), (inx + 3) * lookup(try(var.instances[zone], {}), "worker_cpu", 1) + )) + numas : [0] # [2 + inx] mem : lookup(try(var.instances[zone], {}), "worker_mem", 2048) - ip0 : lookup(try(var.instances[zone], {}), "worke_ip0", "ip6=auto") - ipv4 : "${cidrhost(local.subnets[zone], 4 + inx)}/24" - gwv4 : local.gwv4 + + hvv4 = cidrhost(local.subnets[zone], 0) + ipv4 : cidrhost(local.subnets[zone], 7 + inx) + gwv4 : cidrhost(local.subnets[zone], 0) + + ipv6ula : cidrhost(cidrsubnet(var.vpc_main_cidr[1], 16, index(local.zones, zone)), 384 + lookup(try(var.instances[zone], {}), "worker_id", 9000) + inx) + ipv6 : cidrhost(cidrsubnet(lookup(try(var.nodes[zone], {}), "ip6", "fe80::/64"), 16, 1 + index(local.zones, zone)), 384 + lookup(try(var.instances[zone], {}), "worker_id", 9000) + inx) + gwv6 : lookup(try(var.nodes[zone], {}), "gw6", "fe80::1") } ] ]) : k.name => k } } -resource "null_resource" "worker_machineconfig" { - for_each = { for k, v in var.instances : k => v if lookup(try(var.instances[k], {}), "worker_count", 0) > 0 } - connection { - type = "ssh" - user = "root" - host = "${each.key}.${var.proxmox_domain}" - } +resource "proxmox_virtual_environment_file" "worker_machineconfig" { + for_each = local.workers + node_name = each.value.node_name + content_type = "snippets" + datastore_id = "local" - provisioner "file" { - content = templatefile("${path.module}/templates/worker.yaml.tpl", - merge(var.kubernetes, try(var.instances["all"], {}), { - lbv4 = local.ipv4_vip - nodeSubnets = var.vpc_main_cidr - clusterDns = cidrhost(split(",", var.kubernetes["serviceSubnets"])[0], 10) - labels = local.worker_labels + source_raw { + data = templatefile("${path.module}/templates/${lookup(var.instances[each.value.zone], "worker_template", "worker.yaml.tpl")}", + merge(local.kubernetes, try(var.instances["all"], {}), { + labels = join(",", [local.web_labels, lookup(var.instances[each.value.zone], "worker_labels", "")]) + nodeSubnets = [local.subnets[each.value.zone], var.vpc_main_cidr[1]] + lbv4 = local.lbv4 + ipv4 = each.value.ipv4 + gwv4 = each.value.gwv4 + hvv4 = each.value.hvv4 + ipv6 = "${each.value.ipv6}/64" + gwv6 = each.value.gwv6 + kernelArgs = [] })) - destination = "/var/lib/vz/snippets/${local.worker_prefix}.yaml" - } - - triggers = { - params = filemd5("${path.module}/templates/worker.yaml.tpl") + file_name = "${each.value.name}.yaml" } } -resource "null_resource" "worker_metadata" { - for_each = local.workers - connection { - type = "ssh" - user = "root" - host = "${each.value.node_name}.${var.proxmox_domain}" - } +resource "proxmox_virtual_environment_file" "worker_metadata" { + for_each = local.workers + node_name = each.value.node_name + content_type = "snippets" + datastore_id = "local" - provisioner "file" { - content = templatefile("${path.module}/templates/metadata.yaml", { + source_raw { + data = templatefile("${path.module}/templates/metadata.yaml", { hostname : each.value.name, id : each.value.id, providerID : "proxmox://${var.region}/${each.value.id}", @@ -61,154 +68,159 @@ resource "null_resource" "worker_metadata" { zone : each.value.zone, region : var.region, }) - destination = "/var/lib/vz/snippets/${each.value.name}.metadata.yaml" - } - - triggers = { - params = join(",", [for k, v in local.workers[each.key] : "${k}-${v}"]) + file_name = "${each.value.name}.metadata.yaml" } } -# resource "proxmox_virtual_environment_vm" "talos" { -# for_each = local.workers -# name = each.value.name -# tags = ["talos"] - -# node_name = each.value.node_name -# vm_id = each.value.id - -# initialization { -# datastore_id = "local" -# ip_config { -# ipv6 { -# address = "slaac" -# # gateway = "" -# } -# } -# ip_config { -# ipv4 { -# address = "2.3.4.5/24" -# } -# } -# user_data_file_id = "" -# } -# clone { -# vm_id = 102 -# datastore_id = var.proxmox_storage -# } -# disk { -# datastore_id = var.proxmox_storage -# interface = "scsi0" -# ssd = true -# size = 32 -# file_format = "raw" -# } -# cpu { -# cores = each.value.cpu -# sockets = 1 -# type = "host" -# flags = ["+aes"] -# } -# memory { -# dedicated = each.value.mem -# } - -# network_device { -# model = "virtio" -# bridge = "vmbr0" -# # firewall = true -# } -# network_device { -# model = "virtio" -# bridge = "vmbr1" -# } - -# operating_system { -# type = "l26" -# } -# agent { -# enabled = false -# } - -# serial_device {} -# lifecycle { -# ignore_changes = [ -# tags, -# cpu, -# memory, -# network_device, -# ] -# } - -# depends_on = [null_resource.worker_machineconfig, null_resource.worker_metadata] -# } - -resource "proxmox_vm_qemu" "worker" { +resource "proxmox_virtual_environment_vm" "worker" { for_each = local.workers name = each.value.name - vmid = each.value.id - target_node = each.value.node_name - clone = var.proxmox_image + node_name = each.value.node_name + vm_id = each.value.id + description = "Talos worker node" - agent = 0 - define_connection_info = false - os_type = "ubuntu" - qemu_os = "l26" - # ipconfig0 = each.value.ip0 - ipconfig0 = "ip=${each.value.ipv4},gw=${each.value.gwv4}" - cicustom = "user=local:snippets/${local.worker_prefix}.yaml,meta=local:snippets/${each.value.name}.metadata.yaml" - cloudinit_cdrom_storage = var.proxmox_storage - - onboot = false - cpu = "host,flags=+aes" - sockets = 1 - cores = each.value.cpu - memory = each.value.mem - numa = true - scsihw = "virtio-scsi-single" - - vga { - memory = 0 - type = "serial0" - } - serial { - id = 0 - type = "socket" + startup { + order = 7 + up_delay = 15 } - network { - model = "virtio" - bridge = "vmbr0" - firewall = true + machine = "pc" + cpu { + architecture = "x86_64" + cores = each.value.cpu + affinity = each.value.cpus + sockets = 1 + numa = true + type = "host" } - network { - model = "virtio" - bridge = "vmbr1" + memory { + dedicated = each.value.mem + # hugepages = "1024" + # keep_hugepages = true + } + dynamic "numa" { + for_each = { for idx, numa in each.value.numas : numa => { + device = "numa${idx}" + cpus = "0-${each.value.cpu - 1}" + mem = each.value.mem + } } + content { + device = numa.value.device + cpus = numa.value.cpus + hostnodes = numa.key + memory = numa.value.mem + policy = "bind" + } } - boot = "order=scsi0" + scsi_hardware = "virtio-scsi-single" disk { - type = "scsi" - storage = var.proxmox_storage - size = "32G" - cache = "writethrough" - ssd = 1 - backup = false + datastore_id = lookup(try(var.nodes[each.value.zone], {}), "storage", "local") + interface = "scsi0" + iothread = true + ssd = true + cache = "none" + size = 32 + file_format = "raw" + } + clone { + vm_id = proxmox_virtual_environment_vm.template[each.value.zone].id + } + + initialization { + dns { + servers = [each.value.gwv4, "2001:4860:4860::8888"] + } + ip_config { + ipv6 { + address = "${each.value.ipv6}/64" + gateway = each.value.gwv6 + } + } + ip_config { + ipv4 { + address = "${each.value.ipv4}/24" + gateway = each.value.hvv4 + } + ipv6 { + address = "${each.value.ipv6ula}/64" + } + } + + datastore_id = "local" + meta_data_file_id = proxmox_virtual_environment_file.worker_metadata[each.key].id + user_data_file_id = proxmox_virtual_environment_file.worker_machineconfig[each.key].id + } + + network_device { + bridge = "vmbr0" + queues = each.value.cpu + mtu = 1500 + mac_address = "32:90:${join(":", formatlist("%02X", split(".", each.value.ipv4)))}" + firewall = true + } + network_device { + bridge = "vmbr1" + queues = each.value.cpu + mtu = 1400 + firewall = false + } + + operating_system { + type = "l26" + } + + serial_device {} + vga { + type = "serial0" } lifecycle { ignore_changes = [ - boot, + started, + clone, + ipv4_addresses, + ipv6_addresses, + network_interface_names, + initialization, disk, - network, - desc, - numa, - agent, - ipconfig0, - ipconfig1, - define_connection_info, + # memory, + # numa, ] } - depends_on = [null_resource.worker_machineconfig, null_resource.worker_metadata] + tags = [local.kubernetes["clusterName"]] + depends_on = [proxmox_virtual_environment_file.worker_machineconfig] +} + +resource "proxmox_virtual_environment_firewall_options" "worker" { + for_each = local.workers + node_name = each.value.node_name + vm_id = each.value.id + enabled = true + + dhcp = false + ipfilter = false + log_level_in = "nolog" + log_level_out = "nolog" + macfilter = false + ndp = true + input_policy = "DROP" + output_policy = "ACCEPT" + radv = false + + depends_on = [proxmox_virtual_environment_vm.worker] +} + +resource "proxmox_virtual_environment_firewall_rules" "worker" { + for_each = { for k, v in local.workers : k => v if lookup(try(var.instances[v.zone], {}), "worker_sg", "") != "" } + node_name = each.value.node_name + vm_id = each.value.id + + rule { + enabled = true + security_group = lookup(var.instances[each.value.zone], "worker_sg") + } + + depends_on = [proxmox_virtual_environment_vm.worker, proxmox_virtual_environment_firewall_options.worker] } diff --git a/proxmox/network-lb.tf b/proxmox/network-lb.tf deleted file mode 100644 index 3fcb73b..0000000 --- a/proxmox/network-lb.tf +++ /dev/null @@ -1,5 +0,0 @@ - -locals { - gwv4 = cidrhost(var.vpc_main_cidr, 1) - ipv4_vip = cidrhost(var.vpc_main_cidr, 10) -} diff --git a/proxmox/network.tf b/proxmox/network.tf index e3bb67c..6465dd0 100644 --- a/proxmox/network.tf +++ b/proxmox/network.tf @@ -1,7 +1,8 @@ locals { - zones = [for k, v in var.instances : k if k != "all"] + zones = [for k, v in var.instances : k] + subnets = { for inx, zone in local.zones : zone => cidrsubnet(var.vpc_main_cidr[0], 4, var.network_shift + inx - 1) if zone != "all" } - controlplane_subnet = cidrsubnet(var.vpc_main_cidr, 5, var.network_shift) - subnets = { for inx, zone in local.zones : zone => cidrsubnet(var.vpc_main_cidr, 5, var.network_shift + inx + 1) } + gwv4 = cidrhost(var.vpc_main_cidr[0], -3) + lbv4 = cidrhost(var.vpc_main_cidr[0], 10) } diff --git a/proxmox/outputs.tf b/proxmox/outputs.tf index 1984412..1c8ccb0 100644 --- a/proxmox/outputs.tf +++ b/proxmox/outputs.tf @@ -1,30 +1,24 @@ output "controlplane_endpoint" { description = "Kubernetes controlplane endpoint" - value = local.ipv4_vip + value = try(one(local.controlplane_v6), "") +} + +output "controlplane_endpoints" { + description = "Kubernetes controlplane endpoints" + value = try(local.controlplane_v4, []) } output "controlplane_firstnode" { description = "Kubernetes controlplane first node" - value = try(flatten([for s in local.controlplanes : split("/", s.ipv4)[0]])[0], "127.0.0.1") + value = try(flatten([for s in local.controlplanes : [s.ipv6, s.ipv4]])[0], "127.0.0.1") } -output "controlplane_apply" { - description = "Kubernetes controlplane apply command" - value = [for cp in local.controlplanes : - "talosctl apply-config --insecure --nodes ${split("/", cp.ipv4)[0]} --config-patch @_cfgs/${cp.name}.yaml --file _cfgs/controlplane.yaml" - ] - depends_on = [proxmox_vm_qemu.controlplane] +output "controlplane_lbv4" { + description = "Kubernetes controlplane loadbalancer" + value = try(local.lbv4, "") } -output "controlplane_nodes" { - description = "Kubernetes controlplane nodes" - value = [ - for s in local.controlplanes : - { - name = s.name - ipv4_address = split("/", s.ipv4)[0] - zone = s.zone - } - ] +output "subnets" { + value = local.subnets } diff --git a/proxmox/templates/controlplane.yaml.tpl b/proxmox/templates/controlplane.yaml.tpl index 2e2c6f7..2428258 100644 --- a/proxmox/templates/controlplane.yaml.tpl +++ b/proxmox/templates/controlplane.yaml.tpl @@ -1,18 +1,19 @@ machine: kubelet: + image: ghcr.io/siderolabs/kubelet:${version} extraArgs: rotate-server-certificates: true clusterDNS: - 169.254.2.53 - ${cidrhost(split(",",serviceSubnets)[0], 10)} nodeIP: - validSubnets: ${format("%#v",split(",",nodeSubnets))} + validSubnets: ${format("%#v",nodeSubnets)} network: - hostname: "${name}" + hostname: ${name} interfaces: - - interface: eth0 + - interface: eth1 vip: - ip: ${ipv4_vip} + ip: ${lbv4} - interface: dummy0 addresses: - 169.254.2.53/32 @@ -57,20 +58,31 @@ cluster: podSubnets: ${format("%#v",split(",",podSubnets))} serviceSubnets: ${format("%#v",split(",",serviceSubnets))} cni: - name: custom - urls: - - https://raw.githubusercontent.com/sergelogvinov/terraform-talos/main/_deployments/vars/cilium-result.yaml + name: none proxy: disabled: true + apiServer: + image: registry.k8s.io/kube-apiserver:${version} + resources: + requests: + cpu: 500m + memory: 1Gi + certSANs: + - ${apiDomain} controllerManager: + image: registry.k8s.io/kube-controller-manager:${version} extraArgs: node-cidr-mask-size-ipv4: 24 node-cidr-mask-size-ipv6: 112 + scheduler: + image: registry.k8s.io/kube-scheduler:${version} etcd: advertisedSubnets: - - ${nodeSubnets} + - ${nodeSubnets[0]} listenSubnets: - - ${nodeSubnets} + - ${nodeSubnets[0]} + externalCloudProvider: + enabled: true inlineManifests: - name: proxmox-cloud-controller-manager contents: |- @@ -82,15 +94,3 @@ cluster: namespace: kube-system data: config.yaml: ${base64encode(clusters)} - externalCloudProvider: - enabled: true - manifests: - - https://raw.githubusercontent.com/sergelogvinov/terraform-talos/main/_deployments/vars/talos-cloud-controller-manager-result.yaml - - https://raw.githubusercontent.com/sergelogvinov/proxmox-cloud-controller-manager/main/docs/deploy/cloud-controller-manager-talos.yml - - https://raw.githubusercontent.com/sergelogvinov/proxmox-csi-plugin/main/docs/deploy/proxmox-csi-plugin-talos.yml - - https://raw.githubusercontent.com/sergelogvinov/terraform-talos/main/_deployments/vars/metrics-server-result.yaml - - https://raw.githubusercontent.com/sergelogvinov/terraform-talos/main/_deployments/vars/local-path-storage-ns.yaml - - https://raw.githubusercontent.com/sergelogvinov/terraform-talos/main/_deployments/vars/local-path-storage-result.yaml - - https://raw.githubusercontent.com/sergelogvinov/terraform-talos/main/_deployments/vars/coredns-local.yaml - - https://raw.githubusercontent.com/sergelogvinov/terraform-talos/main/_deployments/vars/ingress-ns.yaml - - https://raw.githubusercontent.com/sergelogvinov/terraform-talos/main/_deployments/vars/ingress-result.yaml diff --git a/proxmox/templates/web.yaml.tpl b/proxmox/templates/web.yaml.tpl index 6e337da..5a1ea91 100644 --- a/proxmox/templates/web.yaml.tpl +++ b/proxmox/templates/web.yaml.tpl @@ -17,7 +17,7 @@ machine: - 169.254.2.53 - ${clusterDns} nodeIP: - validSubnets: ${format("%#v",split(",",nodeSubnets))} + validSubnets: ${format("%#v",nodeSubnets)} network: interfaces: - interface: dummy0 @@ -27,24 +27,21 @@ machine: - ip: ${lbv4} aliases: - ${apiDomain} - nameservers: - - 2606:4700:4700::1111 - - 1.1.1.1 - - 2001:4860:4860::8888 - time: - servers: - - 2.europe.pool.ntp.org - - time.cloudflare.com sysctls: net.core.somaxconn: 65535 net.core.netdev_max_backlog: 4096 net.ipv4.tcp_keepalive_intvl: 60 net.ipv4.tcp_keepalive_time: 600 + net.ipv4.tcp_fin_timeout: 10 + net.ipv4.tcp_tw_reuse: 1 vm.max_map_count: 128000 install: wipe: true extraKernelArgs: - talos.dashboard.disabled=1 +%{ for arg in kernelArgs ~} + - ${arg} +%{ endfor ~} systemDiskEncryption: state: provider: luks2 @@ -73,12 +70,13 @@ cluster: endpoint: https://${apiDomain}:6443 clusterName: ${clusterName} discovery: - enabled: true + enabled: false network: dnsDomain: ${domain} + podSubnets: ${format("%#v",split(",",podSubnets))} serviceSubnets: ${format("%#v",split(",",serviceSubnets))} proxy: - disabled: false + disabled: true token: ${token} ca: crt: ${ca} diff --git a/proxmox/templates/worker.patch.yaml.tpl b/proxmox/templates/worker.patch.yaml.tpl deleted file mode 100644 index eae21bc..0000000 --- a/proxmox/templates/worker.patch.yaml.tpl +++ /dev/null @@ -1,45 +0,0 @@ -machine: - kubelet: - extraArgs: - cloud-provider: external - rotate-server-certificates: true - node-labels: "project.io/node-pool=worker" - clusterDNS: - - 169.254.2.53 - - ${cidrhost(split(",",serviceSubnets)[0], 10)} - nodeIP: - validSubnets: ${format("%#v",split(",",nodeSubnets))} - network: - interfaces: - - interface: dummy0 - addresses: - - 169.254.2.53/32 - extraHostEntries: - - ip: ${lbv4} - aliases: - - ${apiDomain} - sysctls: - net.core.somaxconn: 65535 - net.core.netdev_max_backlog: 4096 - systemDiskEncryption: - state: - provider: luks2 - options: - - no_read_workqueue - - no_write_workqueue - keys: - - nodeID: {} - slot: 0 - ephemeral: - provider: luks2 - options: - - no_read_workqueue - - no_write_workqueue - keys: - - nodeID: {} - slot: 0 -cluster: - controlPlane: - endpoint: https://${apiDomain}:6443 - proxy: - disabled: true diff --git a/proxmox/templates/worker.yaml.tpl b/proxmox/templates/worker.yaml.tpl index 6e337da..029c162 100644 --- a/proxmox/templates/worker.yaml.tpl +++ b/proxmox/templates/worker.yaml.tpl @@ -13,11 +13,19 @@ machine: cloud-provider: external rotate-server-certificates: true node-labels: ${labels} + extraConfig: + imageGCHighThresholdPercent: 70 + imageGCLowThresholdPercent: 50 + shutdownGracePeriod: 60s + topologyManagerPolicy: best-effort + topologyManagerScope: container + cpuManagerPolicy: static + allowedUnsafeSysctls: [net.core.somaxconn] clusterDNS: - 169.254.2.53 - - ${clusterDns} + - ${cidrhost(split(",",serviceSubnets)[0], 10)} nodeIP: - validSubnets: ${format("%#v",split(",",nodeSubnets))} + validSubnets: ${format("%#v",nodeSubnets)} network: interfaces: - interface: dummy0 @@ -27,24 +35,21 @@ machine: - ip: ${lbv4} aliases: - ${apiDomain} - nameservers: - - 2606:4700:4700::1111 - - 1.1.1.1 - - 2001:4860:4860::8888 - time: - servers: - - 2.europe.pool.ntp.org - - time.cloudflare.com sysctls: net.core.somaxconn: 65535 net.core.netdev_max_backlog: 4096 net.ipv4.tcp_keepalive_intvl: 60 net.ipv4.tcp_keepalive_time: 600 + net.ipv4.tcp_fin_timeout: 10 + net.ipv4.tcp_tw_reuse: 1 vm.max_map_count: 128000 install: wipe: true extraKernelArgs: - talos.dashboard.disabled=1 +%{ for arg in kernelArgs ~} + - ${arg} +%{ endfor ~} systemDiskEncryption: state: provider: luks2 @@ -73,12 +78,13 @@ cluster: endpoint: https://${apiDomain}:6443 clusterName: ${clusterName} discovery: - enabled: true + enabled: false network: dnsDomain: ${domain} + podSubnets: ${format("%#v",split(",",podSubnets))} serviceSubnets: ${format("%#v",split(",",serviceSubnets))} proxy: - disabled: false + disabled: true token: ${token} ca: crt: ${ca} diff --git a/proxmox/variables.tf b/proxmox/variables.tf index f65ff74..34c1ba1 100644 --- a/proxmox/variables.tf +++ b/proxmox/variables.tf @@ -1,19 +1,14 @@ -variable "proxmox_domain" { - description = "Proxmox host" - type = string - default = "example.com" -} - variable "proxmox_host" { description = "Proxmox host" type = string default = "192.168.1.1" } -variable "proxmox_nodename" { - description = "Proxmox node name" +variable "proxmox_domain" { + description = "Proxmox domain name" type = string + default = "proxmox.local" } variable "proxmox_image" { @@ -22,73 +17,78 @@ variable "proxmox_image" { default = "talos" } -variable "proxmox_storage" { - description = "Proxmox storage name" - type = string -} - -variable "proxmox_token_id" { - description = "Proxmox token id" - type = string -} - -variable "proxmox_token_secret" { - description = "Proxmox token secret" - type = string -} - variable "region" { description = "Proxmox Cluster Name" type = string - default = "cluster-1" -} - -variable "kubernetes" { - type = map(string) - default = { - podSubnets = "10.32.0.0/12,fd40:10:32::/102" - serviceSubnets = "10.200.0.0/22,fd40:10:200::/112" - domain = "cluster.local" - apiDomain = "api.cluster.local" - clusterName = "talos-k8s-proxmox" - tokenMachine = "" - caMachine = "" - token = "" - ca = "" - } - sensitive = true + default = "region-1" } variable "network_shift" { description = "Network number shift" type = number - default = 6 + default = 8 } variable "vpc_main_cidr" { description = "Local proxmox subnet" + type = list(string) + default = ["172.16.0.0/24", "fd60:172:16::/64"] +} + +variable "release" { type = string - default = "192.168.0.0/24" + description = "The version of the Talos image" + default = "1.7.4" +} + +data "sops_file" "tfvars" { + source_file = "terraform.tfvars.sops.json" +} + +data "terraform_remote_state" "init" { + backend = "local" + config = { + path = "${path.module}/init/terraform.tfstate" + } +} + +locals { + kubernetes = jsondecode(data.sops_file.tfvars.raw)["kubernetes"] + + proxmox_token = data.terraform_remote_state.init.outputs.ccm +} + +variable "nodes" { + description = "Proxmox nodes properties" + type = map(any) + default = { + "hvm-1" = { + storage = "data", + cpu = ["0-3,16-19", "4-7,20-23", "8-11,24-27", "12-15,28-31"], + ip4 = "1.1.0.1" + ip6 = "2001:1:2:1::/64", + gw6 = "2001:1:2:1::64", + }, + "hvm-2" = { + storage = "data", + cpu = ["0-3,16-19", "4-7,20-23", "8-11,24-27", "12-15,28-31"], + ip4 = "1.1.0.2" + ip6 = "2001:1:2:2::/64", + gw6 = "2001:1:2:2::64", + }, + } } variable "controlplane" { description = "Property of controlplane" type = map(any) default = { - "node1" = { - id = 500 + "hvm-1" = { + id = 10010 count = 0, - cpu = 2, - mem = 4096, - # ip0 = "ip6=1:2::3/64,gw6=1:2::1" + cpu = 4, + mem = 6144, }, - "node2" = { - id = 510 - count = 0, - cpu = 2, - mem = 4096, - # ip0 = "ip6=dhcp", - } } } @@ -97,39 +97,64 @@ variable "instances" { type = map(any) default = { "all" = { - version = "v1.28.2" + version = "v1.30.2" }, - "node1" = { - web_id = 1000 - web_count = 0, - web_cpu = 2, - web_mem = 4096, - web_ip0 = "", # ip=dhcp,ip6=dhcp - worker_id = 1050 - worker_count = 0, - worker_cpu = 2, - worker_mem = 4096, - worker_ip0 = "", # ip=dhcp,ip6=dhcp + "hvm-1" = { + enabled = false, + web_id = 11020, + web_count = 0, + web_cpu = 8, + web_mem = 27648, + web_template = "worker-sriov.yaml.tpl" + web_labels = "" + web_sg = "kubernetes" + worker_id = 11030, + worker_count = 0, + worker_cpu = 8, + worker_mem = 28672, + worker_template = "worker-sriov.yaml.tpl" + worker_sg = "kubernetes" + db_id = 11030 + db_count = 0, + db_cpu = 8, + db_mem = 28672, + db_template = "worker-sriov.yaml.tpl" + db_labels = "" + db_sg = "kubernetes" + }, + "hvm-2" = { + enabled = false, + web_id = 12020, + web_count = 0, + web_cpu = 8, + web_mem = 27648, + web_template = "worker-sriov.yaml.tpl" + web_labels = "" + web_sg = "kubernetes" + worker_id = 12030, + worker_count = 0, + worker_cpu = 8, + worker_mem = 28672, + worker_template = "worker-sriov.yaml.tpl" + worker_sg = "kubernetes" + db_id = 12040 + db_count = 0, + db_cpu = 8, + db_mem = 28672, + db_template = "worker-sriov.yaml.tpl" + db_labels = "" + db_sg = "kubernetes" }, - "node2" = { - web_id = 2000 - web_count = 0, - web_cpu = 2, - web_mem = 4096, - worker_id = 2050 - worker_count = 0, - worker_cpu = 2, - worker_mem = 4096, - } - "node3" = { - web_id = 3000 - web_count = 0, - web_cpu = 2, - web_mem = 4096, - worker_id = 3050 - worker_count = 0, - worker_cpu = 2, - worker_mem = 4096, - } + } +} + +variable "security_groups" { + description = "Map of security groups" + type = map(any) + default = { + "controlplane" = "kubernetes" + "web" = "kubernetes" + "worker" = "kubernetes" + "db" = "kubernetes" } } diff --git a/proxmox/vars/proxmox-ccm.yaml b/proxmox/vars/proxmox-ccm.yaml new file mode 100644 index 0000000..fbc9ecd --- /dev/null +++ b/proxmox/vars/proxmox-ccm.yaml @@ -0,0 +1,15 @@ + +existingConfigSecret: proxmox-cloud-controller-manager + +affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: node-role.kubernetes.io/control-plane + operator: Exists + +logVerbosityLevel: 3 + +enabledControllers: + - cloud-node-lifecycle diff --git a/proxmox/vars/proxmox-csi.yaml b/proxmox/vars/proxmox-csi.yaml new file mode 100644 index 0000000..64c26cb --- /dev/null +++ b/proxmox/vars/proxmox-csi.yaml @@ -0,0 +1,23 @@ + +storageClass: + - name: proxmox + storage: zfs + cache: none + ssd: true + fstype: ext4 + reclaimPolicy: Delete + +replicaCount: 1 + +nodeSelector: + node-role.kubernetes.io/control-plane: "" + node.cloudprovider.kubernetes.io/platform: nocloud +tolerations: + - key: node-role.kubernetes.io/control-plane + effect: NoSchedule + +node: + nodeSelector: + node.cloudprovider.kubernetes.io/platform: nocloud + tolerations: + - operator: Exists diff --git a/proxmox/vars/proxmox-ns.yaml b/proxmox/vars/proxmox-ns.yaml new file mode 100644 index 0000000..bbc67cd --- /dev/null +++ b/proxmox/vars/proxmox-ns.yaml @@ -0,0 +1,8 @@ +apiVersion: v1 +kind: Namespace +metadata: + name: csi-proxmox + labels: + pod-security.kubernetes.io/enforce: privileged + pod-security.kubernetes.io/audit: baseline + pod-security.kubernetes.io/warn: baseline diff --git a/proxmox/vars/secrets.proxmox.yaml b/proxmox/vars/secrets.proxmox.yaml new file mode 100644 index 0000000..e437ac1 --- /dev/null +++ b/proxmox/vars/secrets.proxmox.yaml @@ -0,0 +1,7 @@ +config: + clusters: + - region: region-1 + url: https://172.16.0.128:8006/api2/json + insecure: true + token_id: kubernetes@pve!csi + token_secret: f6ead34e-11c0-4c4d-b8f3-7ae99b526ac0 diff --git a/proxmox/versions.tf b/proxmox/versions.tf index cdf21f3..b602df0 100644 --- a/proxmox/versions.tf +++ b/proxmox/versions.tf @@ -1,13 +1,13 @@ terraform { required_providers { proxmox = { - source = "Telmate/proxmox" - version = "~> 2.9.14" + source = "bpg/proxmox" + version = "0.60.0" + } + sops = { + source = "carlpett/sops" + version = "1.0.0" } - # proxmox = { - # source = "bpg/proxmox" - # version = "~> 0.35.1" - # } } required_version = ">= 1.0" }