diff --git a/conditional.tf b/conditional.tf index c2ad7d6..abd5331 100644 --- a/conditional.tf +++ b/conditional.tf @@ -40,6 +40,22 @@ locals { if var.networking == "calico" } + # cilium manifests map + # { manifests-networking/manifest.yaml => content } + cilium_manifests = { + for name in fileset("${path.module}/resources/cilium", "**/*.yaml") : + "manifests-networking/${name}" => templatefile( + "${path.module}/resources/cilium/${name}", + { + cilium_agent_image = var.container_images["cilium_agent"] + cilium_operator_image = var.container_images["cilium_operator"] + pod_cidr = var.pod_cidr + daemonset_tolerations = var.daemonset_tolerations + } + ) + if var.networking == "cilium" + } + # kube-router manifests map # { manifests-networking/manifest.yaml => content } kube_router_manifests = { diff --git a/outputs.tf b/outputs.tf index 93dbb87..a742983 100644 --- a/outputs.tf +++ b/outputs.tf @@ -29,6 +29,7 @@ output "assets_dist" { local.flannel_manifests, local.calico_manifests, local.kube_router_manifests, + local.cilium_manifests, ) sensitive = true } diff --git a/resources/cilium/cluster-role-binding.yaml b/resources/cilium/cluster-role-binding.yaml new file mode 100644 index 0000000..6133d05 --- /dev/null +++ b/resources/cilium/cluster-role-binding.yaml @@ -0,0 +1,27 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: cilium-operator +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: cilium-operator +subjects: +- kind: ServiceAccount + name: cilium-operator + namespace: kube-system + +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: cilium-agent +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: cilium-agent +subjects: +- kind: ServiceAccount + name: cilium-agent + namespace: kube-system + diff --git a/resources/cilium/cluster-role.yaml b/resources/cilium/cluster-role.yaml new file mode 100644 index 0000000..f29912c --- /dev/null +++ b/resources/cilium/cluster-role.yaml @@ -0,0 +1,136 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: cilium-operator +rules: +- apiGroups: + - "" + resources: + # to automatically delete [core|kube]dns pods so that are starting to being + # managed by Cilium + - pods + verbs: + - get + - list + - watch + - delete +- apiGroups: + - discovery.k8s.io + resources: + - endpointslices + verbs: + - get + - list + - watch +- apiGroups: + - "" + resources: + # to perform the translation of a CNP that contains `ToGroup` to its endpoints + - services + - endpoints + # to check apiserver connectivity + - namespaces + verbs: + - get + - list + - watch +- apiGroups: + - apiextensions.k8s.io + resources: + - customresourcedefinitions + verbs: + - get + - list + - watch +- apiGroups: + - cilium.io + resources: + - ciliumnetworkpolicies + - ciliumnetworkpolicies/status + - ciliumclusterwidenetworkpolicies + - ciliumclusterwidenetworkpolicies/status + - ciliumendpoints + - ciliumendpoints/status + - ciliumnodes + - ciliumnodes/status + - ciliumidentities + - ciliumidentities/status + verbs: + - '*' + +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: cilium-agent +rules: +- apiGroups: + - networking.k8s.io + resources: + - networkpolicies + verbs: + - get + - list + - watch +- apiGroups: + - discovery.k8s.io + resources: + - endpointslices + verbs: + - get + - list + - watch +- apiGroups: + - "" + resources: + - namespaces + - services + - nodes + - endpoints + verbs: + - get + - list + - watch +- apiGroups: + - "" + resources: + - pods + - nodes + verbs: + - get + - list + - watch + - update +- apiGroups: + - "" + resources: + - nodes + - nodes/status + verbs: + - patch +- apiGroups: + - apiextensions.k8s.io + resources: + - customresourcedefinitions + verbs: + - create + - get + - list + - watch + - update +- apiGroups: + - cilium.io + resources: + - ciliumnetworkpolicies + - ciliumnetworkpolicies/status + - ciliumclusterwidenetworkpolicies + - ciliumclusterwidenetworkpolicies/status + - ciliumendpoints + - ciliumendpoints/status + - ciliumnodes + - ciliumnodes/status + - ciliumidentities + - ciliumidentities/status + verbs: + - '*' + diff --git a/resources/cilium/config.yaml b/resources/cilium/config.yaml new file mode 100644 index 0000000..98de7ae --- /dev/null +++ b/resources/cilium/config.yaml @@ -0,0 +1,175 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: cilium + namespace: kube-system +data: + # Identity allocation mode selects how identities are shared between cilium + # nodes by setting how they are stored. The options are "crd" or "kvstore". + # - "crd" stores identities in kubernetes as CRDs (custom resource definition). + # These can be queried with: + # kubectl get ciliumid + # - "kvstore" stores identities in a kvstore, etcd or consul, that is + # configured below. Cilium versions before 1.6 supported only the kvstore + # backend. Upgrades from these older cilium versions should continue using + # the kvstore by commenting out the identity-allocation-mode below, or + # setting it to "kvstore". + identity-allocation-mode: crd + + # identity-change-grace-period is the grace period that needs to pass + # before an endpoint that has changed its identity will start using + # that new identity. During the grace period, the new identity has + # already been allocated and other nodes in the cluster have a chance + # to whitelist the new upcoming identity of the endpoint. + identity-change-grace-period: "5s" + + # If you want to run cilium in debug mode change this value to true + debug: "false" + + # TCP liveness and readiness probes (prefer exec probe for now) + agent-health-port: "9876" + + # Prometheus + # enable-metrics: "true" + # prometheus-serve-addr: ":foo" + # operator-prometheus-serve-addr: ":bar" + + # Enable IPv4 addressing. If enabled, all endpoints are allocated an IPv4 + # address. + enable-ipv4: "true" + + # Enable IPv6 addressing. If enabled, all endpoints are allocated an IPv6 + # address. + enable-ipv6: "false" + + # Enable probing for a more efficient clock source for the BPF datapath + enable-bpf-clock-probe: "true" + + # If you want cilium monitor to aggregate tracing for packets, set this level + # to "low", "medium", or "maximum". The higher the level, the less packets + # that will be seen in monitor output. + monitor-aggregation: medium + + # The monitor aggregation interval governs the typical time between monitor + # notification events for each allowed connection. + # + # Only effective when monitor aggregation is set to "medium" or higher. + monitor-aggregation-interval: 5s + + # The monitor aggregation flags determine which TCP flags which, upon the + # first observation, cause monitor notifications to be generated. + # + # Only effective when monitor aggregation is set to "medium" or higher. + monitor-aggregation-flags: all + + # bpf-policy-map-max specified the maximum number of entries in endpoint + # policy map (per endpoint) + bpf-policy-map-max: "16384" + + # Specifies the ratio (0.0-1.0) of total system memory to use for dynamic + # sizing of the TCP CT, non-TCP CT, NAT and policy BPF maps. + bpf-map-dynamic-size-ratio: "0.0025" + + # Pre-allocation of map entries allows per-packet latency to be reduced, at + # the expense of up-front memory allocation for the entries in the maps. The + # default value below will minimize memory usage in the default installation; + # users who are sensitive to latency may consider setting this to "true". + # + # This option was introduced in Cilium 1.4. Cilium 1.3 and earlier ignore + # this option and behave as though it is set to "true". + # + # If this value is modified, then during the next Cilium startup the restore + # of existing endpoints and tracking of ongoing connections may be disrupted. + # This may lead to policy drops or a change in loadbalancing decisions for a + # connection for some time. Endpoints may need to be recreated to restore + # connectivity. + # + # If this option is set to "false" during an upgrade from 1.3 or earlier to + # 1.4 or later, then it may cause one-time disruptions during the upgrade. + preallocate-bpf-maps: "false" + + # Encapsulation mode for communication between nodes + # Possible values: + # - disabled + # - vxlan (default) + # - geneve + tunnel: vxlan + + # Name of the cluster. Only relevant when building a mesh of clusters. + cluster-name: default + + # DNS Polling periodically issues a DNS lookup for each `matchName` from + # cilium-agent. The result is used to regenerate endpoint policy. + # DNS lookups are repeated with an interval of 5 seconds, and are made for + # A(IPv4) and AAAA(IPv6) addresses. Should a lookup fail, the most recent IP + # data is used instead. An IP change will trigger a regeneration of the Cilium + # policy for each endpoint and increment the per cilium-agent policy + # repository revision. + # + # This option is disabled by default starting from version 1.4.x in favor + # of a more powerful DNS proxy-based implementation, see [0] for details. + # Enable this option if you want to use FQDN policies but do not want to use + # the DNS proxy. + # + # To ease upgrade, users may opt to set this option to "true". + # Otherwise please refer to the Upgrade Guide [1] which explains how to + # prepare policy rules for upgrade. + # + # [0] http://docs.cilium.io/en/stable/policy/language/#dns-based + # [1] http://docs.cilium.io/en/stable/install/upgrade/#changes-that-may-require-action + tofqdns-enable-poller: "false" + + # wait-bpf-mount makes init container wait until bpf filesystem is mounted + wait-bpf-mount: "false" + + auto-direct-node-routes: "false" + + # enableXTSocketFallback enables the fallback compatibility solution + # when the xt_socket kernel module is missing and it is needed for + # the datapath L7 redirection to work properly. See documentation + # for details on when this can be disabled: + # http://docs.cilium.io/en/latest/install/system_requirements/#admin-kernel-version. + enable-xt-socket-fallback: "true" + + # installIptablesRules enables installation of iptables rules to allow for + # TPROXY (L7 proxy injection), itpables based masquerading and compatibility + # with kube-proxy. See documentation for details on when this can be + # disabled. + install-iptables-rules: "true" + + # masquerade traffic leaving the node destined for outside + masquerade: "true" + # bpfMasquerade enables masquerading with BPF instead of iptables + enable-bpf-masquerade: "true" + + # kube-proxy + kube-proxy-replacement: "probe" + enable-session-affinity: "false" + + # ClusterIPs from host namespace + enable-host-reachable-services: "false" + + # NodePort + enable-node-port: "true" + node-port-bind-protection: "true" + enable-auto-protect-node-port-range: "true" + + # IPAM + ipam: "cluster-pool" + disable-cnp-status-updates: "true" + k8s-require-ipv4-pod-cidr: "true" + k8s-require-ipv6-pod-cidr: "false" + cluster-pool-ipv4-cidr: "${pod_cidr}" + cluster-pool-ipv4-mask-size: "24" + + # Health + enable-endpoint-health-checking: "true" + + # Identity + enable-well-known-identities: "false" + enable-remote-node-identity: "true" + + # Misc + # enable-l7-proxy: "false" + policy-audit-mode: "false" + operator-api-serve-addr: "127.0.0.1:9234" diff --git a/resources/cilium/daemonset.yaml b/resources/cilium/daemonset.yaml new file mode 100644 index 0000000..0c77e03 --- /dev/null +++ b/resources/cilium/daemonset.yaml @@ -0,0 +1,175 @@ +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: cilium-agent + namespace: kube-system + labels: + k8s-app: cilium-agent +spec: + selector: + matchLabels: + k8s-app: cilium-agent + updateStrategy: + type: RollingUpdate + rollingUpdate: + maxUnavailable: 1 + template: + metadata: + labels: + k8s-app: cilium-agent + annotations: + seccomp.security.alpha.kubernetes.io/pod: 'docker/default' + spec: + hostNetwork: true + priorityClassName: system-node-critical + serviceAccountName: cilium-agent + tolerations: + - key: node-role.kubernetes.io/controller + operator: Exists + - key: node.kubernetes.io/not-ready + operator: Exists + %{~ for key in daemonset_tolerations ~} + - key: ${key} + operator: Exists + %{~ endfor ~} + initContainers: + - name: clean-cilium-state + image: ${cilium_agent_image} + command: + - /init-container.sh + env: + - name: CILIUM_WAIT_BPF_MOUNT + valueFrom: + configMapKeyRef: + name: cilium + key: wait-bpf-mount + optional: true + securityContext: + capabilities: + add: + - NET_ADMIN + privileged: true + volumeMounts: + - name: sys-fs-bpf + mountPath: /sys/fs/bpf + mountPropagation: HostToContainer + - name: var-run-cilium + mountPath: /var/run/cilium + containers: + - name: cilium-agent + image: ${cilium_agent_image} + command: + - cilium-agent + args: + - --config-dir=/tmp/cilium/config-map + env: + - name: K8S_NODE_NAME + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: spec.nodeName + - name: CILIUM_K8S_NAMESPACE + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: metadata.namespace + ports: + # Not yet used, prefer exec's + - name: health + protocol: TCP + containerPort: 9876 + lifecycle: + # Install Cilium CNI binary and CNI network config + postStart: + exec: + command: + - "/cni-install.sh" + - "--enable-debug=false" + preStop: + exec: + command: + - /cni-uninstall.sh + securityContext: + capabilities: + add: + - NET_ADMIN + - SYS_MODULE + privileged: true + livenessProbe: + exec: + command: + - cilium + - status + - --brief + periodSeconds: 30 + initialDelaySeconds: 120 + successThreshold: 1 + failureThreshold: 10 + timeoutSeconds: 5 + readinessProbe: + exec: + command: + - cilium + - status + - --brief + periodSeconds: 20 + initialDelaySeconds: 5 + successThreshold: 1 + failureThreshold: 3 + timeoutSeconds: 5 + volumeMounts: + # Load kernel modules + - name: lib-modules + mountPath: /lib/modules + readOnly: true + - name: xtables-lock + mountPath: /run/xtables.lock + # Keep state between restarts + - name: var-run-cilium + mountPath: /var/run/cilium + - name: sys-fs-bpf + mountPath: /sys/fs/bpf + # Configuration + - name: config + mountPath: /tmp/cilium/config-map + readOnly: true + # Install CNI plugin and config on host + - name: cni-bin-dir + mountPath: /host/opt/cni/bin + - name: cni-conf-dir + mountPath: /host/etc/cni/net.d + terminationGracePeriodSeconds: 1 + volumes: + # Load kernel modules + - name: lib-modules + hostPath: + path: /lib/modules + # Access iptables concurrently with other processes (e.g. kube-proxy) + - name: xtables-lock + hostPath: + type: FileOrCreate + path: /run/xtables.lock + # Keep state between restarts + - name: var-run-cilium + hostPath: + path: /var/run/cilium + type: DirectoryOrCreate + # Keep state between restarts for bpf maps + - name: sys-fs-bpf + hostPath: + path: /sys/fs/bpf + type: DirectoryOrCreate + # Read configuration + - name: config + configMap: + name: cilium + # Install CNI plugin and config on host + - name: cni-bin-dir + hostPath: + type: DirectoryOrCreate + path: /opt/cni/bin + - name: cni-conf-dir + hostPath: + type: DirectoryOrCreate + path: /etc/kubernetes/cni/net.d + diff --git a/resources/cilium/deployment.yaml b/resources/cilium/deployment.yaml new file mode 100644 index 0000000..0be972e --- /dev/null +++ b/resources/cilium/deployment.yaml @@ -0,0 +1,77 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: cilium-operator + namespace: kube-system +spec: + replicas: 1 + strategy: + type: RollingUpdate + rollingUpdate: + maxUnavailable: 1 + selector: + matchLabels: + name: cilium-operator + template: + metadata: + labels: + name: cilium-operator + annotations: + seccomp.security.alpha.kubernetes.io/pod: 'docker/default' + spec: + hostNetwork: true + priorityClassName: system-cluster-critical + serviceAccountName: cilium-operator + tolerations: + - key: node-role.kubernetes.io/controller + operator: Exists + - key: node.kubernetes.io/not-ready + operator: Exists + containers: + - name: cilium-operator + image: ${cilium_operator_image} + command: + - cilium-operator-generic + args: + - --config-dir=/tmp/cilium/config-map + - --debug=$(CILIUM_DEBUG) + env: + - name: K8S_NODE_NAME + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: spec.nodeName + - name: CILIUM_K8S_NAMESPACE + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: metadata.namespace + - name: CILIUM_DEBUG + valueFrom: + configMapKeyRef: + name: cilium + key: debug + optional: true + ports: + - name: health + protocol: TCP + containerPort: 9234 + livenessProbe: + httpGet: + scheme: HTTP + host: 127.0.0.1 + port: 9234 + path: /healthz + initialDelaySeconds: 60 + periodSeconds: 10 + timeoutSeconds: 3 + volumeMounts: + - name: config + mountPath: /tmp/cilium/config-map + readOnly: true + volumes: + # Read configuration + - name: config + configMap: + name: cilium + diff --git a/resources/cilium/service-account.yaml b/resources/cilium/service-account.yaml new file mode 100644 index 0000000..5b551c9 --- /dev/null +++ b/resources/cilium/service-account.yaml @@ -0,0 +1,13 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: cilium-operator + namespace: kube-system + +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: cilium-agent + namespace: kube-system + diff --git a/variables.tf b/variables.tf index a5e6a1c..0401296 100644 --- a/variables.tf +++ b/variables.tf @@ -27,7 +27,7 @@ variable "cloud_provider" { variable "networking" { type = string - description = "Choice of networking provider (flannel or calico or kube-router)" + description = "Choice of networking provider (flannel or calico or kube-router or cilium)" default = "flannel" } @@ -80,7 +80,9 @@ variable "container_images" { kube_scheduler = "k8s.gcr.io/kube-scheduler:v1.18.4" kube_proxy = "k8s.gcr.io/kube-proxy:v1.18.4" # experimental - kube_router = "cloudnativelabs/kube-router:v0.3.2" + kube_router = "cloudnativelabs/kube-router:v0.3.2" + cilium_agent = "docker.io/cilium/cilium:v1.8.0-rc4" + cilium_operator = "docker.io/cilium/operator-generic:v1.8.0-rc4" } }