From 482239ddbc177832ec47395a1939e1db066a14fb Mon Sep 17 00:00:00 2001 From: Serge Logvinov Date: Sun, 23 Oct 2022 11:13:24 +0300 Subject: [PATCH] add deployment --- exoscale/common.tf | 5 +- exoscale/deployments/cilium-result.yaml | 993 +++++++++++++++++++++++ exoscale/deployments/cilium.yaml | 66 ++ exoscale/instances-controlplane.tf | 10 + exoscale/prepare/network-secgroup.tf | 10 + exoscale/talos.tf | 30 + exoscale/templates/controlplane.yaml.tpl | 4 + exoscale/variables.tf | 3 +- 8 files changed, 1117 insertions(+), 4 deletions(-) create mode 100644 exoscale/deployments/cilium-result.yaml create mode 100644 exoscale/deployments/cilium.yaml diff --git a/exoscale/common.tf b/exoscale/common.tf index fc5ba12..d293f08 100644 --- a/exoscale/common.tf +++ b/exoscale/common.tf @@ -1,8 +1,9 @@ data "exoscale_compute_template" "debian" { - for_each = { for idx, name in local.regions : name => idx } + for_each = { for idx, name in local.regions : name => idx if try(var.controlplane[name].count, 0) > 0 } zone = each.key - name = "Linux Debian 11 (Bullseye) 64-bit" + name = "talos" + filter = "mine" } resource "exoscale_ssh_key" "terraform" { diff --git a/exoscale/deployments/cilium-result.yaml b/exoscale/deployments/cilium-result.yaml new file mode 100644 index 0000000..2649271 --- /dev/null +++ b/exoscale/deployments/cilium-result.yaml @@ -0,0 +1,993 @@ +--- +# Source: cilium/templates/cilium-agent/serviceaccount.yaml +apiVersion: v1 +kind: ServiceAccount +metadata: + name: "cilium" + namespace: kube-system +--- +# Source: cilium/templates/cilium-operator/serviceaccount.yaml +apiVersion: v1 +kind: ServiceAccount +metadata: + name: "cilium-operator" + namespace: kube-system +--- +# Source: cilium/templates/cilium-configmap.yaml +apiVersion: v1 +kind: ConfigMap +metadata: + name: cilium-config + namespace: kube-system +data: + + # Identity allocation mode selects how identities are shared between cilium + # nodes by setting how they are stored. The options are "crd" or "kvstore". + # - "crd" stores identities in kubernetes as CRDs (custom resource definition). + # These can be queried with: + # kubectl get ciliumid + # - "kvstore" stores identities in an etcd kvstore, that is + # configured below. Cilium versions before 1.6 supported only the kvstore + # backend. Upgrades from these older cilium versions should continue using + # the kvstore by commenting out the identity-allocation-mode below, or + # setting it to "kvstore". + identity-allocation-mode: crd + cilium-endpoint-gc-interval: "5m0s" + nodes-gc-interval: "5m0s" + # Disable the usage of CiliumEndpoint CRD + disable-endpoint-crd: "false" + + # If you want to run cilium in debug mode change this value to true + debug: "false" + # The agent can be put into the following three policy enforcement modes + # default, always and never. + # https://docs.cilium.io/en/latest/policy/intro/#policy-enforcement-modes + enable-policy: "default" + # If you want metrics enabled in all of your Cilium agents, set the port for + # which the Cilium agents will have their metrics exposed. + # This option deprecates the "prometheus-serve-addr" in the + # "cilium-metrics-config" ConfigMap + # NOTE that this will open the port on ALL nodes where Cilium pods are + # scheduled. + prometheus-serve-addr: ":9962" + # Port to expose Envoy metrics (e.g. "9964"). Envoy metrics listener will be disabled if this + # field is not set. + proxy-prometheus-port: "9964" + + # Enable IPv4 addressing. If enabled, all endpoints are allocated an IPv4 + # address. + enable-ipv4: "true" + + # Enable IPv6 addressing. If enabled, all endpoints are allocated an IPv6 + # address. + enable-ipv6: "true" + # Users who wish to specify their own custom CNI configuration file must set + # custom-cni-conf to "true", otherwise Cilium may overwrite the configuration. + custom-cni-conf: "false" + enable-bpf-clock-probe: "true" + # If you want cilium monitor to aggregate tracing for packets, set this level + # to "low", "medium", or "maximum". The higher the level, the less packets + # that will be seen in monitor output. + monitor-aggregation: medium + + # The monitor aggregation interval governs the typical time between monitor + # notification events for each allowed connection. + # + # Only effective when monitor aggregation is set to "medium" or higher. + monitor-aggregation-interval: 5s + + # The monitor aggregation flags determine which TCP flags which, upon the + # first observation, cause monitor notifications to be generated. + # + # Only effective when monitor aggregation is set to "medium" or higher. + monitor-aggregation-flags: all + # Specifies the ratio (0.0-1.0) of total system memory to use for dynamic + # sizing of the TCP CT, non-TCP CT, NAT and policy BPF maps. + bpf-map-dynamic-size-ratio: "0.0025" + # bpf-policy-map-max specifies the maximum number of entries in endpoint + # policy map (per endpoint) + bpf-policy-map-max: "16384" + # bpf-lb-map-max specifies the maximum number of entries in bpf lb service, + # backend and affinity maps. + bpf-lb-map-max: "65536" + # bpf-lb-bypass-fib-lookup instructs Cilium to enable the FIB lookup bypass + # optimization for nodeport reverse NAT handling. + bpf-lb-external-clusterip: "false" + + # Pre-allocation of map entries allows per-packet latency to be reduced, at + # the expense of up-front memory allocation for the entries in the maps. The + # default value below will minimize memory usage in the default installation; + # users who are sensitive to latency may consider setting this to "true". + # + # This option was introduced in Cilium 1.4. Cilium 1.3 and earlier ignore + # this option and behave as though it is set to "true". + # + # If this value is modified, then during the next Cilium startup the restore + # of existing endpoints and tracking of ongoing connections may be disrupted. + # As a result, reply packets may be dropped and the load-balancing decisions + # for established connections may change. + # + # If this option is set to "false" during an upgrade from 1.3 or earlier to + # 1.4 or later, then it may cause one-time disruptions during the upgrade. + preallocate-bpf-maps: "false" + + # Regular expression matching compatible Istio sidecar istio-proxy + # container image names + sidecar-istio-proxy-image: "cilium/istio_proxy" + + # Name of the cluster. Only relevant when building a mesh of clusters. + cluster-name: default + # Unique ID of the cluster. Must be unique across all conneted clusters and + # in the range of 1 and 255. Only relevant when building a mesh of clusters. + cluster-id: "0" + + # Encapsulation mode for communication between nodes + # Possible values: + # - disabled + # - vxlan (default) + # - geneve + tunnel: "vxlan" + # Enables L7 proxy for L7 policy enforcement and visibility + enable-l7-proxy: "true" + + enable-ipv4-masquerade: "true" + enable-ipv6-masquerade: "true" + enable-bpf-masquerade: "false" + + enable-xt-socket-fallback: "true" + install-iptables-rules: "true" + install-no-conntrack-iptables-rules: "false" + + auto-direct-node-routes: "false" + enable-local-redirect-policy: "true" + enable-host-firewall: "true" + # List of devices used to attach bpf_host.o (implements BPF NodePort, + # host-firewall and BPF masquerading) + devices: "eth+" + + kube-proxy-replacement: "strict" + kube-proxy-replacement-healthz-bind-address: "" + bpf-lb-sock: "false" + host-reachable-services-protos: + enable-health-check-nodeport: "true" + node-port-bind-protection: "true" + enable-auto-protect-node-port-range: "true" + enable-svc-source-range-check: "true" + enable-l2-neigh-discovery: "true" + arping-refresh-period: "30s" + k8s-require-ipv4-pod-cidr: "true" + k8s-require-ipv6-pod-cidr: "true" + enable-endpoint-health-checking: "true" + enable-health-checking: "true" + enable-well-known-identities: "false" + enable-remote-node-identity: "true" + synchronize-k8s-nodes: "true" + operator-api-serve-addr: "127.0.0.1:9234" + ipam: "kubernetes" + disable-cnp-status-updates: "true" + enable-vtep: "false" + vtep-endpoint: "" + vtep-cidr: "" + vtep-mask: "" + vtep-mac: "" + enable-k8s-endpoint-slice: "true" + enable-bgp-control-plane: "false" + procfs: "/host/proc" + bpf-root: "/sys/fs/bpf" + cgroup-root: "/sys/fs/cgroup" + enable-k8s-terminating-endpoint: "true" + remove-cilium-node-taints: "true" + set-cilium-is-up-condition: "true" + unmanaged-pod-watcher-interval: "15" + tofqdns-dns-reject-response-code: "refused" + tofqdns-enable-dns-compression: "true" + tofqdns-endpoint-max-ip-per-hostname: "50" + tofqdns-idle-connection-grace-period: "0s" + tofqdns-max-deferred-connection-deletes: "10000" + tofqdns-min-ttl: "3600" + tofqdns-proxy-response-max-delay: "100ms" + agent-not-ready-taint-key: "node.cilium.io/agent-not-ready" +--- +# Source: cilium/templates/cilium-agent/clusterrole.yaml +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: cilium +rules: +- apiGroups: + - networking.k8s.io + resources: + - networkpolicies + verbs: + - get + - list + - watch +- apiGroups: + - discovery.k8s.io + resources: + - endpointslices + verbs: + - get + - list + - watch +- apiGroups: + - "" + resources: + - namespaces + - services + - pods + - endpoints + - nodes + verbs: + - get + - list + - watch +- apiGroups: + - apiextensions.k8s.io + resources: + - customresourcedefinitions + verbs: + - list + - watch + # This is used when validating policies in preflight. This will need to stay + # until we figure out how to avoid "get" inside the preflight, and then + # should be removed ideally. + - get +- apiGroups: + - cilium.io + resources: + - ciliumbgploadbalancerippools + - ciliumbgppeeringpolicies + - ciliumclusterwideenvoyconfigs + - ciliumclusterwidenetworkpolicies + - ciliumegressgatewaypolicies + - ciliumegressnatpolicies + - ciliumendpoints + - ciliumendpointslices + - ciliumenvoyconfigs + - ciliumidentities + - ciliumlocalredirectpolicies + - ciliumnetworkpolicies + - ciliumnodes + verbs: + - list + - watch +- apiGroups: + - cilium.io + resources: + - ciliumidentities + - ciliumendpoints + - ciliumnodes + verbs: + - create +- apiGroups: + - cilium.io + # To synchronize garbage collection of such resources + resources: + - ciliumidentities + verbs: + - update +- apiGroups: + - cilium.io + resources: + - ciliumendpoints + verbs: + - delete + - get +- apiGroups: + - cilium.io + resources: + - ciliumnodes + - ciliumnodes/status + verbs: + - get + - update +- apiGroups: + - cilium.io + resources: + - ciliumnetworkpolicies/status + - ciliumclusterwidenetworkpolicies/status + - ciliumendpoints/status + - ciliumendpoints + verbs: + - patch +--- +# Source: cilium/templates/cilium-operator/clusterrole.yaml +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: cilium-operator +rules: +- apiGroups: + - "" + resources: + - pods + verbs: + - get + - list + - watch + # to automatically delete [core|kube]dns pods so that are starting to being + # managed by Cilium + - delete +- apiGroups: + - "" + resources: + - nodes + verbs: + - list + - watch +- apiGroups: + - "" + resources: + # To remove node taints + - nodes + # To set NetworkUnavailable false on startup + - nodes/status + verbs: + - patch +- apiGroups: + - discovery.k8s.io + resources: + - endpointslices + verbs: + - get + - list + - watch +- apiGroups: + - "" + resources: + # to perform LB IP allocation for BGP + - services/status + verbs: + - update +- apiGroups: + - "" + resources: + # to check apiserver connectivity + - namespaces + verbs: + - get + - list + - watch +- apiGroups: + - "" + resources: + # to perform the translation of a CNP that contains `ToGroup` to its endpoints + - services + - endpoints + verbs: + - get + - list + - watch +- apiGroups: + - cilium.io + resources: + - ciliumnetworkpolicies + - ciliumclusterwidenetworkpolicies + verbs: + # Create auto-generated CNPs and CCNPs from Policies that have 'toGroups' + - create + - update + - deletecollection + # To update the status of the CNPs and CCNPs + - patch + - get + - list + - watch +- apiGroups: + - cilium.io + resources: + - ciliumnetworkpolicies/status + - ciliumclusterwidenetworkpolicies/status + verbs: + # Update the auto-generated CNPs and CCNPs status. + - patch + - update +- apiGroups: + - cilium.io + resources: + - ciliumendpoints + - ciliumidentities + verbs: + # To perform garbage collection of such resources + - delete + - list + - watch +- apiGroups: + - cilium.io + resources: + - ciliumidentities + verbs: + # To synchronize garbage collection of such resources + - update +- apiGroups: + - cilium.io + resources: + - ciliumnodes + verbs: + - create + - update + - get + - list + - watch + # To perform CiliumNode garbage collector + - delete +- apiGroups: + - cilium.io + resources: + - ciliumnodes/status + verbs: + - update +- apiGroups: + - cilium.io + resources: + - ciliumendpointslices + - ciliumenvoyconfigs + verbs: + - create + - update + - get + - list + - watch + - delete +- apiGroups: + - apiextensions.k8s.io + resources: + - customresourcedefinitions + verbs: + - create + - get + - list + - watch +- apiGroups: + - apiextensions.k8s.io + resources: + - customresourcedefinitions + verbs: + - update + resourceNames: + - ciliumbgploadbalancerippools.cilium.io + - ciliumbgppeeringpolicies.cilium.io + - ciliumclusterwideenvoyconfigs.cilium.io + - ciliumclusterwidenetworkpolicies.cilium.io + - ciliumegressgatewaypolicies.cilium.io + - ciliumegressnatpolicies.cilium.io + - ciliumendpoints.cilium.io + - ciliumendpointslices.cilium.io + - ciliumenvoyconfigs.cilium.io + - ciliumexternalworkloads.cilium.io + - ciliumidentities.cilium.io + - ciliumlocalredirectpolicies.cilium.io + - ciliumnetworkpolicies.cilium.io + - ciliumnodes.cilium.io +# For cilium-operator running in HA mode. +# +# Cilium operator running in HA mode requires the use of ResourceLock for Leader Election +# between multiple running instances. +# The preferred way of doing this is to use LeasesResourceLock as edits to Leases are less +# common and fewer objects in the cluster watch "all Leases". +- apiGroups: + - coordination.k8s.io + resources: + - leases + verbs: + - create + - get + - update +--- +# Source: cilium/templates/cilium-agent/clusterrolebinding.yaml +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: cilium +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: cilium +subjects: +- kind: ServiceAccount + name: "cilium" + namespace: kube-system +--- +# Source: cilium/templates/cilium-operator/clusterrolebinding.yaml +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: cilium-operator +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: cilium-operator +subjects: +- kind: ServiceAccount + name: "cilium-operator" + namespace: kube-system +--- +# Source: cilium/templates/cilium-agent/service.yaml +apiVersion: v1 +kind: Service +metadata: + name: cilium-agent + namespace: kube-system + annotations: + prometheus.io/scrape: "true" + prometheus.io/port: "9964" + labels: + k8s-app: cilium +spec: + clusterIP: None + type: ClusterIP + selector: + k8s-app: cilium + ports: + - name: envoy-metrics + port: 9964 + protocol: TCP + targetPort: envoy-metrics +--- +# Source: cilium/templates/cilium-agent/daemonset.yaml +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: cilium + namespace: kube-system + labels: + k8s-app: cilium +spec: + selector: + matchLabels: + k8s-app: cilium + updateStrategy: + rollingUpdate: + maxUnavailable: 2 + type: RollingUpdate + template: + metadata: + annotations: + prometheus.io/port: "9962" + prometheus.io/scrape: "true" + # Set app AppArmor's profile to "unconfined". The value of this annotation + # can be modified as long users know which profiles they have available + # in AppArmor. + container.apparmor.security.beta.kubernetes.io/cilium-agent: "unconfined" + container.apparmor.security.beta.kubernetes.io/clean-cilium-state: "unconfined" + labels: + k8s-app: cilium + spec: + containers: + - name: cilium-agent + image: "quay.io/cilium/cilium:v1.12.3@sha256:30de50c4dc0a1e1077e9e7917a54d5cab253058b3f779822aec00f5c817ca826" + imagePullPolicy: IfNotPresent + command: + - cilium-agent + args: + - --config-dir=/tmp/cilium/config-map + startupProbe: + httpGet: + host: "127.0.0.1" + path: /healthz + port: 9879 + scheme: HTTP + httpHeaders: + - name: "brief" + value: "true" + failureThreshold: 105 + periodSeconds: 2 + successThreshold: 1 + livenessProbe: + httpGet: + host: "127.0.0.1" + path: /healthz + port: 9879 + scheme: HTTP + httpHeaders: + - name: "brief" + value: "true" + periodSeconds: 30 + successThreshold: 1 + failureThreshold: 10 + timeoutSeconds: 5 + readinessProbe: + httpGet: + host: "127.0.0.1" + path: /healthz + port: 9879 + scheme: HTTP + httpHeaders: + - name: "brief" + value: "true" + periodSeconds: 30 + successThreshold: 1 + failureThreshold: 3 + timeoutSeconds: 5 + env: + - name: K8S_NODE_NAME + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: spec.nodeName + - name: CILIUM_K8S_NAMESPACE + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: metadata.namespace + - name: CILIUM_CLUSTERMESH_CONFIG + value: /var/lib/cilium/clustermesh/ + - name: CILIUM_CNI_CHAINING_MODE + valueFrom: + configMapKeyRef: + name: cilium-config + key: cni-chaining-mode + optional: true + - name: CILIUM_CUSTOM_CNI_CONF + valueFrom: + configMapKeyRef: + name: cilium-config + key: custom-cni-conf + optional: true + - name: KUBERNETES_SERVICE_HOST + value: "api.cluster.local" + - name: KUBERNETES_SERVICE_PORT + value: "6443" + lifecycle: + postStart: + exec: + command: + - "/cni-install.sh" + - "--enable-debug=false" + - "--cni-exclusive=true" + - "--log-file=/var/run/cilium/cilium-cni.log" + preStop: + exec: + command: + - /cni-uninstall.sh + resources: + limits: + cpu: 1 + memory: 1Gi + requests: + cpu: 100m + memory: 128Mi + ports: + - name: peer-service + containerPort: 4244 + hostPort: 4244 + protocol: TCP + - name: prometheus + containerPort: 9962 + hostPort: 9962 + protocol: TCP + - name: envoy-metrics + containerPort: 9964 + hostPort: 9964 + protocol: TCP + securityContext: + seLinuxOptions: + level: 's0' + # Running with spc_t since we have removed the privileged mode. + # Users can change it to a different type as long as they have the + # type available on the system. + type: 'spc_t' + capabilities: + add: + # Use to set socket permission + - CHOWN + # Used to terminate envoy child process + - KILL + # Used since cilium modifies routing tables, etc... + - NET_ADMIN + # Used since cilium creates raw sockets, etc... + - NET_RAW + # Used since cilium monitor uses mmap + - IPC_LOCK + # Used in iptables. Consider removing once we are iptables-free + - SYS_MODULE + # We need it for now but might not need it for >= 5.11 specially + # for the 'SYS_RESOURCE'. + # In >= 5.8 there's already BPF and PERMON capabilities + - SYS_ADMIN + # Could be an alternative for the SYS_ADMIN for the RLIMIT_NPROC + - SYS_RESOURCE + # Both PERFMON and BPF requires kernel 5.8, container runtime + # cri-o >= v1.22.0 or containerd >= v1.5.0. + # If available, SYS_ADMIN can be removed. + #- PERFMON + #- BPF + - DAC_OVERRIDE + - FOWNER + - SETGID + - SETUID + drop: + - ALL + terminationMessagePolicy: FallbackToLogsOnError + volumeMounts: + # Unprivileged containers need to mount /proc/sys/net from the host + # to have write access + - mountPath: /host/proc/sys/net + name: host-proc-sys-net + # Unprivileged containers need to mount /proc/sys/kernel from the host + # to have write access + - mountPath: /host/proc/sys/kernel + name: host-proc-sys-kernel + - name: bpf-maps + mountPath: /sys/fs/bpf + # Unprivileged containers can't set mount propagation to bidirectional + # in this case we will mount the bpf fs from an init container that + # is privileged and set the mount propagation from host to container + # in Cilium. + mountPropagation: HostToContainer + # Check for duplicate mounts before mounting + - name: cilium-cgroup + mountPath: /sys/fs/cgroup + - name: cilium-run + mountPath: /var/run/cilium + - name: cni-path + mountPath: /host/opt/cni/bin + - name: etc-cni-netd + mountPath: /host/etc/cni/net.d + - name: clustermesh-secrets + mountPath: /var/lib/cilium/clustermesh + readOnly: true + - name: cilium-config-path + mountPath: /tmp/cilium/config-map + readOnly: true + # Needed to be able to load kernel modules + - name: lib-modules + mountPath: /lib/modules + readOnly: true + - name: xtables-lock + mountPath: /run/xtables.lock + initContainers: + # Mount the bpf fs if it is not mounted. We will perform this task + # from a privileged container because the mount propagation bidirectional + # only works from privileged containers. + - name: mount-bpf-fs + image: "quay.io/cilium/cilium:v1.12.3@sha256:30de50c4dc0a1e1077e9e7917a54d5cab253058b3f779822aec00f5c817ca826" + imagePullPolicy: IfNotPresent + args: + - 'mount | grep "/sys/fs/bpf type bpf" || mount -t bpf bpf /sys/fs/bpf' + command: + - /bin/bash + - -c + - -- + terminationMessagePolicy: FallbackToLogsOnError + securityContext: + privileged: true + volumeMounts: + - name: bpf-maps + mountPath: /sys/fs/bpf + mountPropagation: Bidirectional + - name: clean-cilium-state + image: "quay.io/cilium/cilium:v1.12.3@sha256:30de50c4dc0a1e1077e9e7917a54d5cab253058b3f779822aec00f5c817ca826" + imagePullPolicy: IfNotPresent + command: + - /init-container.sh + env: + - name: CILIUM_ALL_STATE + valueFrom: + configMapKeyRef: + name: cilium-config + key: clean-cilium-state + optional: true + - name: CILIUM_BPF_STATE + valueFrom: + configMapKeyRef: + name: cilium-config + key: clean-cilium-bpf-state + optional: true + - name: KUBERNETES_SERVICE_HOST + value: "api.cluster.local" + - name: KUBERNETES_SERVICE_PORT + value: "6443" + terminationMessagePolicy: FallbackToLogsOnError + securityContext: + seLinuxOptions: + level: 's0' + # Running with spc_t since we have removed the privileged mode. + # Users can change it to a different type as long as they have the + # type available on the system. + type: 'spc_t' + capabilities: + # Most of the capabilities here are the same ones used in the + # cilium-agent's container because this container can be used to + # uninstall all Cilium resources, and therefore it is likely that + # will need the same capabilities. + add: + # Used since cilium modifies routing tables, etc... + - NET_ADMIN + # Used in iptables. Consider removing once we are iptables-free + - SYS_MODULE + # We need it for now but might not need it for >= 5.11 specially + # for the 'SYS_RESOURCE'. + # In >= 5.8 there's already BPF and PERMON capabilities + - SYS_ADMIN + # Could be an alternative for the SYS_ADMIN for the RLIMIT_NPROC + - SYS_RESOURCE + # Both PERFMON and BPF requires kernel 5.8, container runtime + # cri-o >= v1.22.0 or containerd >= v1.5.0. + # If available, SYS_ADMIN can be removed. + #- PERFMON + #- BPF + drop: + - ALL + volumeMounts: + - name: bpf-maps + mountPath: /sys/fs/bpf + # Required to mount cgroup filesystem from the host to cilium agent pod + - name: cilium-cgroup + mountPath: /sys/fs/cgroup + mountPropagation: HostToContainer + - name: cilium-run + mountPath: /var/run/cilium + resources: + requests: + cpu: 100m + memory: 100Mi # wait-for-kube-proxy + restartPolicy: Always + priorityClassName: system-node-critical + serviceAccount: "cilium" + serviceAccountName: "cilium" + terminationGracePeriodSeconds: 1 + hostNetwork: true + affinity: + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchLabels: + k8s-app: cilium + topologyKey: kubernetes.io/hostname + nodeSelector: + kubernetes.io/os: linux + tolerations: + - operator: Exists + volumes: + # To keep state between restarts / upgrades + - name: cilium-run + hostPath: + path: /var/run/cilium + type: DirectoryOrCreate + # To keep state between restarts / upgrades for bpf maps + - name: bpf-maps + hostPath: + path: /sys/fs/bpf + type: DirectoryOrCreate + # To keep state between restarts / upgrades for cgroup2 filesystem + - name: cilium-cgroup + hostPath: + path: /sys/fs/cgroup + type: DirectoryOrCreate + # To install cilium cni plugin in the host + - name: cni-path + hostPath: + path: /opt/cni/bin + type: DirectoryOrCreate + # To install cilium cni configuration in the host + - name: etc-cni-netd + hostPath: + path: /etc/cni/net.d + type: DirectoryOrCreate + # To be able to load kernel modules + - name: lib-modules + hostPath: + path: /lib/modules + # To access iptables concurrently with other processes (e.g. kube-proxy) + - name: xtables-lock + hostPath: + path: /run/xtables.lock + type: FileOrCreate + # To read the clustermesh configuration + - name: clustermesh-secrets + secret: + secretName: cilium-clustermesh + # note: the leading zero means this number is in octal representation: do not remove it + defaultMode: 0400 + optional: true + # To read the configuration from the config map + - name: cilium-config-path + configMap: + name: cilium-config + - name: host-proc-sys-net + hostPath: + path: /proc/sys/net + type: Directory + - name: host-proc-sys-kernel + hostPath: + path: /proc/sys/kernel + type: Directory +--- +# Source: cilium/templates/cilium-operator/deployment.yaml +apiVersion: apps/v1 +kind: Deployment +metadata: + name: cilium-operator + namespace: kube-system + labels: + io.cilium/app: operator + name: cilium-operator +spec: + # See docs on ServerCapabilities.LeasesResourceLock in file pkg/k8s/version/version.go + # for more details. + replicas: 1 + selector: + matchLabels: + io.cilium/app: operator + name: cilium-operator + strategy: + rollingUpdate: + maxSurge: 1 + maxUnavailable: 1 + type: RollingUpdate + template: + metadata: + annotations: + labels: + io.cilium/app: operator + name: cilium-operator + spec: + containers: + - name: cilium-operator + image: "quay.io/cilium/operator-generic:v1.12.3@sha256:816ec1da586139b595eeb31932c61a7c13b07fb4a0255341c0e0f18608e84eff" + imagePullPolicy: IfNotPresent + command: + - cilium-operator-generic + args: + - --config-dir=/tmp/cilium/config-map + - --debug=$(CILIUM_DEBUG) + env: + - name: K8S_NODE_NAME + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: spec.nodeName + - name: CILIUM_K8S_NAMESPACE + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: metadata.namespace + - name: CILIUM_DEBUG + valueFrom: + configMapKeyRef: + key: debug + name: cilium-config + optional: true + - name: KUBERNETES_SERVICE_HOST + value: "api.cluster.local" + - name: KUBERNETES_SERVICE_PORT + value: "6443" + livenessProbe: + httpGet: + host: "127.0.0.1" + path: /healthz + port: 9234 + scheme: HTTP + initialDelaySeconds: 60 + periodSeconds: 10 + timeoutSeconds: 3 + volumeMounts: + - name: cilium-config-path + mountPath: /tmp/cilium/config-map + readOnly: true + terminationMessagePolicy: FallbackToLogsOnError + hostNetwork: true + restartPolicy: Always + priorityClassName: system-cluster-critical + serviceAccount: "cilium-operator" + serviceAccountName: "cilium-operator" + # In HA mode, cilium-operator pods must not be scheduled on the same + # node as they will clash with each other. + affinity: + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchLabels: + io.cilium/app: operator + topologyKey: kubernetes.io/hostname + nodeSelector: + kubernetes.io/os: linux + tolerations: + - operator: Exists + volumes: + # To read the configuration from the config map + - name: cilium-config-path + configMap: + name: cilium-config diff --git a/exoscale/deployments/cilium.yaml b/exoscale/deployments/cilium.yaml new file mode 100644 index 0000000..e497f9a --- /dev/null +++ b/exoscale/deployments/cilium.yaml @@ -0,0 +1,66 @@ +--- + +k8sServiceHost: "api.cluster.local" +k8sServicePort: "6443" + +operator: + enabled: true + replicas: 1 + prometheus: + enabled: false + +identityAllocationMode: crd +kubeProxyReplacement: strict +enableK8sEndpointSlice: true +localRedirectPolicy: true + +tunnel: "vxlan" +autoDirectNodeRoutes: false +devices: [eth+] + +healthChecking: true + +cni: + install: true + +ipam: + mode: "kubernetes" +k8s: + requireIPv4PodCIDR: true + requireIPv6PodCIDR: true + +bpf: + masquerade: false +ipv4: + enabled: true +ipv6: + enabled: true +hostServices: + enabled: true +hostPort: + enabled: true +nodePort: + enabled: true +externalIPs: + enabled: true +hostFirewall: + enabled: true + +hubble: + enabled: false + +prometheus: + enabled: true + +cgroup: + autoMount: + enabled: false + hostRoot: /sys/fs/cgroup + +resources: + limits: + cpu: 1 + memory: 1Gi + requests: + cpu: 100m + memory: 128Mi diff --git a/exoscale/instances-controlplane.tf b/exoscale/instances-controlplane.tf index 3173190..5b1ce3e 100644 --- a/exoscale/instances-controlplane.tf +++ b/exoscale/instances-controlplane.tf @@ -23,8 +23,18 @@ resource "exoscale_instance_pool" "controlplane" { disk_size = 10 labels = merge(var.tags, { type = "infra" }) + + lifecycle { + ignore_changes = [user_data, labels] + } } +# resource "talos_machine_bootstrap" "controlplane" { +# talos_config = talos_client_configuration.talosconfig.talos_config +# endpoint = [for k, v in var.node_data.controlplanes : k][0] +# node = [for k, v in var.node_data.controlplanes : k][0] +# } + resource "local_sensitive_file" "controlplane" { for_each = { for idx, name in local.regions : name => idx } content = talos_machine_configuration_controlplane.controlplane[each.key].machine_config diff --git a/exoscale/prepare/network-secgroup.tf b/exoscale/prepare/network-secgroup.tf index 5df3c2c..27243b7 100644 --- a/exoscale/prepare/network-secgroup.tf +++ b/exoscale/prepare/network-secgroup.tf @@ -77,6 +77,16 @@ resource "exoscale_security_group_rule" "controlplane_api" { end_port = 6443 } +resource "exoscale_security_group_rule" "controlplane_api_health" { + security_group_id = exoscale_security_group.controlplane.id + description = "controlplane api" + type = "INGRESS" + protocol = "TCP" + cidr = "0.0.0.0/0" + start_port = 6443 + end_port = 6443 +} + resource "exoscale_security_group_rule" "controlplane_talos" { for_each = { for idx, ip in var.whitelist_admin : ip => idx } security_group_id = exoscale_security_group.controlplane.id diff --git a/exoscale/talos.tf b/exoscale/talos.tf index 7fea211..0733b2b 100644 --- a/exoscale/talos.tf +++ b/exoscale/talos.tf @@ -12,6 +12,7 @@ resource "talos_machine_configuration_controlplane" "controlplane" { examples_enabled = false config_patches = [ templatefile("${path.module}/templates/controlplane.yaml.tpl", merge(var.kubernetes, { + nodeSubnets = local.network[each.key].cidr ipv4_local_vip = cidrhost(local.network[each.key].cidr, 5) labels = "topology.kubernetes.io/region=${each.key},topology.kubernetes.io/zone=${each.key},node.kubernetes.io/instance-type=${try(var.controlplane[each.key].type, "standard.tiny")}" })) @@ -27,8 +28,37 @@ resource "talos_machine_configuration_worker" "worker" { examples_enabled = false config_patches = [ templatefile("${path.module}/templates/worker.yaml.tpl", merge(var.kubernetes, { + nodeSubnets = local.network[each.key].cidr ipv4_local_vip = cidrhost(local.network[each.key].cidr, 5) labels = "topology.kubernetes.io/region=${each.key},topology.kubernetes.io/zone=${each.key}" })) ] } + +resource "talos_client_configuration" "talosconfig" { + for_each = { for idx, name in local.regions : name => idx if try(var.controlplane[name].count, 0) > 0 } + cluster_name = var.kubernetes["clusterName"] + machine_secrets = talos_machine_secrets.talos.machine_secrets + endpoints = [for k, v in exoscale_instance_pool.controlplane[each.key].instances : k.public_ip_address] +} + +resource "local_sensitive_file" "talosconfig" { + for_each = { for idx, name in local.regions : name => idx if try(var.controlplane[name].count, 0) > 0 } + content = talos_client_configuration.talosconfig[each.key].talos_config + filename = "_cfgs/talosconfig-${each.key}" + file_permission = "0600" +} + +resource "talos_cluster_kubeconfig" "kubeconfig" { + for_each = { for idx, name in local.regions : name => idx if try(var.controlplane[name].count, 0) > 0 } + talos_config = talos_client_configuration.talosconfig[each.key].talos_config + endpoint = [for k, v in exoscale_instance_pool.controlplane[each.key].instances : k.public_ip_address][0] + node = [for k, v in exoscale_instance_pool.controlplane[each.key].instances : k.public_ip_address][0] +} + +resource "local_sensitive_file" "kubeconfig" { + for_each = { for idx, name in local.regions : name => idx if try(var.controlplane[name].count, 0) > 0 } + content = talos_cluster_kubeconfig.kubeconfig[each.key].kube_config + filename = "_cfgs/kubeconfig-${each.key}" + file_permission = "0600" +} diff --git a/exoscale/templates/controlplane.yaml.tpl b/exoscale/templates/controlplane.yaml.tpl index 99e6303..4bc85cb 100644 --- a/exoscale/templates/controlplane.yaml.tpl +++ b/exoscale/templates/controlplane.yaml.tpl @@ -44,6 +44,10 @@ machine: cluster: network: dnsDomain: ${domain} + cni: + name: custom + urls: + - https://raw.githubusercontent.com/sergelogvinov/terraform-talos/main/exoscale/deployments/cilium-result.yaml proxy: disabled: true controllerManager: diff --git a/exoscale/variables.tf b/exoscale/variables.tf index 7364b75..7d38f52 100644 --- a/exoscale/variables.tf +++ b/exoscale/variables.tf @@ -31,7 +31,7 @@ variable "controlplane" { default = { "de-fra-1" = { count = 0, - type = "standard.tiny", + type = "standard.small", }, "de-muc-1" = { count = 0, @@ -64,7 +64,6 @@ variable "kubernetes" { default = { podSubnets = "10.32.0.0/12,fd40:10:32::/102" serviceSubnets = "10.200.0.0/22,fd40:10:200::/112" - nodeSubnets = "192.168.0.0/16" domain = "cluster.local" apiDomain = "api.cluster.local" clusterName = "talos-k8s-exoscale"