feat(nuclear)!: L3 routed 10G pod network

move from single 1G flat L2 network, to double networks:
- 10G star inter-node k8s network (PodCIDR, Kubelet, etcd, Cilium)
  this network uses OpenFabric for linking nodes
  Cilium adds Geneve overlay network
- per-node /30 to upstream router (LBIPs, clients, rest of network)
  this network uses OSPF to link nodes to router
This commit is contained in:
JJGadgets
2023-07-07 17:57:15 +08:00
parent 748f0e3101
commit de84fdc626
21 changed files with 734 additions and 72 deletions

View File

@@ -31,6 +31,9 @@ tasks:
vars:
C: '{{ or .C (fail "Missing C environment variable for cluster!") }}'
cmds:
- kubectl delete configmap -n kube-system cilium-config || true
- kubectl delete daemonset -n kube-system cilium || true
- kubectl delete deployment -n kube-system cilium-operator || true
- task: k8s:sops-apply
vars:
C: '{{.C | default "biohazard"}}'

View File

@@ -11,19 +11,21 @@ USERS_2_NAME=ENC[AES256_GCM,data:pUEVSALW2LY=,iv:j6zAgL+Z35H1t5RAA6BK9xk99UuDP+B
USERS_3_UID=ENC[AES256_GCM,data:o8/3bQ==,iv:YoXHve6RHidEJ6nJMqOEHGtK2FeH3wHBwYUMxDAfwK0=,tag:jFE66SO+b6xyetCrxd8Zwg==,type:str]
USERS_3_ID=ENC[AES256_GCM,data:uWqHb+s=,iv:j7M/X2+tyLD5qyoulaH9urRoy7zg74UMAzAr8i+TU24=,tag:w93RZByuEOfC39nC7I5wlw==,type:str]
USERS_3_NAME=ENC[AES256_GCM,data:BxSWnRnQwXfHqg==,iv:JmzuZmZZnuQnhI9SYt1TBmBLojmm/gI0ttaLUfXrXpw=,tag:59diPvcbD4Lexj4PVpO3Ig==,type:str]
ASN_CLUSTER=ENC[AES256_GCM,data:9Gp8S5Y=,iv:t0uC5Kkqajpg/LWHNuR6Vio/m7H1Cn+2eYadde6edHM=,tag:b0MktfZ6TP5QYraTtmBrFw==,type:str]
ASN_ROUTER=ENC[AES256_GCM,data:eMc15d8=,iv:6YU/Uet01CLO7H4PYLNZjKPhI/QuXeP/tOzyAMhkPYE=,tag:E9+jAB7qbMEbRZqebkt6Vg==,type:str]
ASN_CLUSTER=ENC[AES256_GCM,data:wn1SX5w=,iv:MgRKQER5ywSoSzWCMMSY3WqKuT3KYY/1FVUzGrz3GZk=,tag:Nu+ZcEfA+6xH5n8UFYd3GQ==,type:str]
ASN_ROUTER=ENC[AES256_GCM,data:sgCa3Sk=,iv:/XWKgNMwZkznrYiVgCcDsWZp3GjQXd6gz+bcT7RI+Os=,tag:YU6F1U5YEXi2c4jfR5wkiQ==,type:str]
ASN_EC2_INGRESS=ENC[AES256_GCM,data:wLsuBoM=,iv:LEgE9JaYszsY62EGXWHF6IrqKH15JrIeJZl73ZDhKtY=,tag:nblIhZueKlV49Xh8JvPyXA==,type:str]
IP_ROUTER_LAN=ENC[AES256_GCM,data:q+9MIIuBLPA=,iv:pzWM3e0qgyRLgYtXv3aoKqX6ZOnpQURGBWaLZZRfQGc=,tag:xEiU2fV3Wt0YHd60hALsUQ==,type:str]
IP_ROUTER_LAN_CIDR=ENC[AES256_GCM,data:VBNZEYACQMQduOU=,iv:is1RkkLkgUYuNPypTFRm7krP9nb1rkrZ64pkQT+5LEM=,tag:opkUbEo8JR1Gp13pklKz7g==,type:str]
IP_K8S_BLACKFISH=ENC[AES256_GCM,data:GxRUuno8nznh,iv:9+81Ymm5wJKyjbncUulpRRuBLLPn13i+jr5ECZLK3pA=,tag:Y3LncpV8exxiQue6yXfhFg==,type:str]
IP_K8S_HUMMING=ENC[AES256_GCM,data:Tvm/Ur3F3Lw5,iv:JEnMlxfhsmv6jh1q+wJPMlU9u/71zNv6JFzhOQPA1Tg=,tag:frSlUlib4CVb0idrUgTMHg==,type:str]
IP_ROUTER_VLAN_K8S=ENC[AES256_GCM,data:BF7rMLUGyiMb,iv:H+s1v1sl6ZNJEvF1QO5kIYE7jquhLrDXbPnpE2PywUY=,tag:Sux+8RhfEHfZDXT2z4S5Jw==,type:str]
IP_ROUTER_VLAN_K8S_CIDR=ENC[AES256_GCM,data:ofSpO4zPW15NjV5U,iv:NiFUvxTyLkN6pamnvvdDp4jrvIDyjUL29iytz6WtQ5o=,tag:J3EfAU0XGsyLM3LyJhqUXA==,type:str]
IP_ROUTER_VLAN_K8S_PREFIX=ENC[AES256_GCM,data:abED1u8guh0=,iv:Po4vQtJTEfBOFItiFzGp1F0YosLpYn97MBuRpEoHNEc=,tag:5RHzXYXjHbPdy8vLXsMM5w==,type:str]
IP_VLAN_BLACKFISH=ENC[AES256_GCM,data:wiVRuonuaoqL,iv:lseWVabezSqniU8ncWoZpxtCmFrm9wJJoPi52WYA7co=,tag:4kIj2ruVmUM44OMu/EY6ww==,type:str]
IP_VLAN_HUMMING=ENC[AES256_GCM,data:2lL0ykXl15We,iv:PK4u09416w41bjrsEZVxAB4xA6jJDU01QbFkqqOxE3I=,tag:VRiy/gjaeY3SF69rwFHG3g==,type:str]
IP_ROUTER_VLAN_K8S=ENC[AES256_GCM,data:Z+H3tR1U+MmC,iv:Rl11kpvATZrjv45C9RLPJFoGweeM352c1o/qJkUMf6A=,tag:aJl9yEaZyGXY12JH6dPdQQ==,type:str]
IP_ROUTER_VLAN_K8S_CIDR=ENC[AES256_GCM,data:BtcBqN2Hxl/2oTDt,iv:1YL9YjMHc1qA5J2bEkdWM6AVJjiDmnlR3X4mtYd4aAM=,tag:0BhGSz34C/Ljh2xpaGRLaQ==,type:str]
IP_ROUTER_VLAN_K8S_PREFIX=ENC[AES256_GCM,data:cy9mQoDa9G0=,iv:9VksTTvKmyimYJkHFS2cGuUOYkT4gpf/CozFEuJ8Nfg=,tag:DttdwyYJyKA2dCDThxBikw==,type:str]
IP_K8S_BLACKFISH=ENC[AES256_GCM,data:0xrxPf7ZaGM2,iv:9lB7hHZ8PEOViL9VQTy9AWvVkghEvdYIPkFeAw/n0pc=,tag:Cf6nTauqLx92mehA9SOiSw==,type:str]
IP_K8S_HUMMING=ENC[AES256_GCM,data:YRFHEf22lp7q,iv:xi0YAQRiebOqADNsN5yNacZkBElA/LTq89LgvbdZn8s=,tag:hUE9x0WmzUpd58tZFC6trA==,type:str]
IP_WG_USER_1_V4=ENC[AES256_GCM,data:6kwe/D0YVGEG7CMWhr8=,iv:B4Dk4AaljCym/cxatpO/5WMZ2E4KMiNH+tCLH+yVsf8=,tag:nqqze7vKrsWTBWG5/Ou/Ag==,type:str]
IP_WG_GUEST_V4=ENC[AES256_GCM,data:zNwOAgzou0T8cAduDBY=,iv:matZ/IhxDQ+CGO3IelqlszVfmAr12dgWXIH9YLGGDOs=,tag:/MJRFYmH69ldrHfdjSQSpA==,type:str]
IP_CLUSTER_VIP=ENC[AES256_GCM,data:cs5dy2t7T+eX,iv:CutO3Djkyf1P5+HkkgHAlv5zCYEzrwumLlYC+tLCUtg=,tag:R+VTeZCVI/K8sIp64c/7lQ==,type:str]
IP_CLUSTER_VIP=ENC[AES256_GCM,data:MNeXioeLb5V6,iv:q5y+X1OUn0NNubISgD3fftMjwX7D3JuiRkPAQaDOn40=,tag:l0BJ2o5O/leFLyWl5MMqqw==,type:str]
IP_POD_CIDR_V4=ENC[AES256_GCM,data:FmWuihOESnZkCKL4Nw==,iv:/gzFG6EXHQbxUXY8sUlBHgfrc6CznxWTEKkwpvNXnZ4=,tag:Fd+EHKVQL2NqWceu47pTpQ==,type:str]
IP_SVC_CIDR_V4=ENC[AES256_GCM,data:izGONZgkmYdt5//n0w==,iv:Xz0/5ZkNsT/weEjPqsGYmVq0CEyioJzXxLPlpyvwwH0=,tag:f1WG/GSvCcCDOMRkH/C80A==,type:str]
IP_LB_CIDR=ENC[AES256_GCM,data:IEDyGQUWrkG9LLUEG2s=,iv:9f143ZAPjlk/qrXj6a4fjQoAwTHGcinBCzPcbdhvkg4=,tag:pc+tuo8/7ftgmifnxgSdMw==,type:str]
@@ -124,12 +126,12 @@ CONFIG_ZEROTIER_ENDPOINT=ENC[AES256_GCM,data:tOyIlrzdn8sck7um7OSicq5T0XWAmymaRLn
CONFIG_AUTHENTIK_REMOTE_HOST=ENC[AES256_GCM,data:K0SOweZKXdAhlKQrZQ42UQ3Eg7u3KTF5nCemjkz2XA==,iv:65Qret7KcDIZRoLzRwoY6sXGNUGfrhsMugJ+jeO5O6Y=,tag:qkv/mksEiPuMtylKvIs/Yw==,type:str]
CONFIG_HEADSCALE_IPV4=ENC[AES256_GCM,data:EZ7GMHA6u1wWPS5g6Pg=,iv:W1hcseQ4Q6CisTXnDLI7hWTy18fIVKtZ46tudCyhfa4=,tag:2WnnNjuZhwUPG07OKTQt2g==,type:str]
CONFIG_OVENMEDIAENGINE_NAME=ENC[AES256_GCM,data:58CuH8bcUHWXBZA=,iv:BN7x6aAJPbzIn25sNoycsHRE5pugkubLS2VrM77+g/E=,tag:6JAsRjU0L6wbZtns3rk6KQ==,type:str]
sops_pgp__list_0__map_enc=-----BEGIN PGP MESSAGE-----\n\nhF4DAAAAAAAAAAASAQdAbA35718t0WVKrjQFYUPviCb0lVuh8NpfSdJCHjHcWWww\n8ak4q4VL69tZLSjQHx+VsMmKooknxWz6pw0lGxyDYlZMQ81bodInjaZGFZSz8Uuh\n0l4BhDCNDBBALTrnTliz6/DAHvmavI4UxMHost5alFio9JPkTDNmXZyvcy1/R6aw\n/uhQXLUBRvm0TSOhBZb7d0SLkLfe02Um40w1TibpKXsZz1GOMbPRNBMHHra0QIuQ\n=0jA+\n-----END PGP MESSAGE-----\n
sops_pgp__list_0__map_fp=31E70E5BC80C58AFF5DD649921AC5A1AC6E5B7F2
sops_lastmodified=2023-07-01T22:29:41Z
sops_unencrypted_suffix=_unencrypted
sops_version=3.7.3
sops_age__list_0__map_recipient=age1u57l4s400gqstc0p485j4646cemntufr0pcyp32yudklsp90xpmszxvnkj
sops_age__list_0__map_enc=-----BEGIN AGE ENCRYPTED FILE-----\nYWdlLWVuY3J5cHRpb24ub3JnL3YxCi0+IFgyNTUxOSBFSXFvLzFQaFJ0OVJKUFV5\nTWh2OUltUlpJWFlVVytFYU9VajBHSnQ4SGdjCnRVbEVXdDVyUHJrR05Ba0xvUm1l\nTkt2YmNUZy90ZFA2b3QrODFKZ01EVG8KLS0tIEw2dkd1cnFCbnI5eWxKL2o1aDVB\nN0hveXZ2dWdxQ2k2L0pGR0ROMStVTmsK4dV/hNyDjsYnVUiFQ7kqdmcVHfYyVckz\nh/rwLjcZgsup72WDVP3v6Eul8B3LKFrSb8CDFA54tyQmSdFDCQC+Zg==\n-----END AGE ENCRYPTED FILE-----\n
sops_mac=ENC[AES256_GCM,data:7/VMqNxRd0fmAUGHCBhQVpUTbNJPEoWdRd8gIHrH9Y9kTThuNy81g6S/OyFaoGIgKqz/anOa/nw24E0lBr3rMq1+pfKmlzIUCJVFem7Vc+vfIAUuugAmISbbMkxYYXfDJcndkW6MqAzNn3R1LGmp8XLcRzH70bj08235sW/68s4=,iv:lVBWwq43H5bNqmTd3+wCaz9URXDjCncUWYg2R4Noakc=,tag:NEQpa/zsQkX9YfW2FHAlbg==,type:str]
sops_pgp__list_0__map_enc=-----BEGIN PGP MESSAGE-----\n\nhF4DAAAAAAAAAAASAQdAbA35718t0WVKrjQFYUPviCb0lVuh8NpfSdJCHjHcWWww\n8ak4q4VL69tZLSjQHx+VsMmKooknxWz6pw0lGxyDYlZMQ81bodInjaZGFZSz8Uuh\n0l4BhDCNDBBALTrnTliz6/DAHvmavI4UxMHost5alFio9JPkTDNmXZyvcy1/R6aw\n/uhQXLUBRvm0TSOhBZb7d0SLkLfe02Um40w1TibpKXsZz1GOMbPRNBMHHra0QIuQ\n=0jA+\n-----END PGP MESSAGE-----\n
sops_unencrypted_suffix=_unencrypted
sops_mac=ENC[AES256_GCM,data:2v3JhTPcCrWBxIFWpVMzok8Vu9cpFWhqKM4WCrmdmGrTlJ+F3BsXymAgLQP+/RjPezV/ucig+RTa54ttsXRYflTt9iWC2Fmvaj2+SePfVxdLog19ToEopQyh1k4cHFWkmIOBNrnnBOqLFTfQZyR5ie7JUYqDlWrknBai7C1RbLw=,iv:xzKFOu7kJhcIOP9T/NMd3be7pQWEZJiHekrVOPFDas0=,tag:BH1zUwurdHVRAbcLK31I8w==,type:str]
sops_lastmodified=2023-07-06T16:24:15Z
sops_pgp__list_0__map_created_at=2023-06-01T18:01:04Z
sops_version=3.7.3
sops_pgp__list_0__map_fp=31E70E5BC80C58AFF5DD649921AC5A1AC6E5B7F2

View File

@@ -1 +1,2 @@
VERSION_ROOK=v1.11.9
VERSION_ROOK=v1.11.9
VERSION_CILIUM=1.14.0-rc.0

View File

@@ -8,7 +8,7 @@ resources:
- flux-install.yaml
- flux-repo.yaml
- ../../../repos/helm/app-template/
- ../../../deploy/core/_networking/cilium/
- ../../../deploy/core/_networking/cilium-nuclear/
- ../../../deploy/core/storage/_external-snapshotter/
- ../../../deploy/core/storage/rook-ceph/
- ../../../deploy/core/storage/rook-ceph/cluster/

View File

@@ -24,30 +24,42 @@ additionalMachineCertSans:
- "c.${DNS_CLUSTER}"
nodes:
- hostname: "blackfish.${DNS_CLUSTER}"
ipAddress: "${IP_K8S_BLACKFISH}"
ipAddress: "${IP_VLAN_BLACKFISH}1"
controlPlane: true
installDiskSelector:
size: "<= 600GB"
type: ssd
nodeLabels:
node-restriction.kubernetes.io/nodeType: main
nodes.home.arpa/type: stable
nameservers:
- "${IP_HOME_DNS}"
disableSearchDomain: true
networkInterfaces:
- interface: eth0
- interface: br0
mtu: 1500
dhcp: false
addresses:
- "${IP_K8S_BLACKFISH}/23"
routes:
- network: "${IP_ROUTER_LAN_CIDR}"
# metric: 1
- network: 0.0.0.0/0
gateway: "${IP_ROUTER_LAN}"
bridge:
stp:
enabled: false
interfaces:
- eth0
# - eth1
# - eth2
# deviceSelector:
# - hardwareAddr: "*:6a"
# driver: mlx4_en
vlans:
- vlanId: 883
mtu: 1500
dhcp: false
addresses: ["${IP_VLAN_BLACKFISH}1/30"]
routes:
- network: "${IP_VLAN_BLACKFISH}0/30"
- network: "0.0.0.0/0"
gateway: "${IP_VLAN_BLACKFISH}2"
# - deviceSelector: {driver: mlx4_en}
- interface: eth4
mtu: 9000
dhcp: false
patches:
# required for Talos to initialize i915 VFIO devices
- |-
@@ -55,10 +67,155 @@ nodes:
install:
extensions:
- image: ghcr.io/siderolabs/i915-ucode:20230310
# FRR routing
- |-
machine:
files:
- op: create
path: /var/etc/frr/frr.conf
permissions: 0o400
content: |
frr version 8.5.2_git
frr defaults traditional
hostname blackfish
log stdout
ip forwarding
no ipv6 forwarding
service integrated-vtysh-config
!
interface lo
ip address ${IP_K8S_BLACKFISH}/32
ip router openfabric 69
openfabric passive
ip ospf area ${IP_K8S_BLACKFISH}
exit
!
interface br0.883
ip ospf area ${IP_K8S_BLACKFISH}
ip ospf network broadcast
exit
!
interface eth4
ip router openfabric 69
openfabric csnp-interval 2
openfabric hello-interval 1
openfabric hello-multiplier 2
exit
!
router openfabric 69
net 69.6969.3333.3333.3333.00
lsp-gen-interval 1
max-lsp-lifetime 600
lsp-refresh-interval 180
exit
!
router ospf
ospf router-id ${IP_K8S_BLACKFISH}
log-adjacency-changes
exit
!
- hostname: "humming.${DNS_CLUSTER}"
ipAddress: "${IP_VLAN_HUMMING}1"
controlPlane: false
installDiskSelector:
size: "<= 600GB"
type: nvme
nameservers:
- "${IP_HOME_DNS}"
disableSearchDomain: true
networkInterfaces:
- interface: br0
mtu: 1500
dhcp: false
bridge:
stp:
enabled: false
interfaces:
- eth0
# - eth1
# - eth2
vlans:
- vlanId: 882
mtu: 1500
dhcp: false
addresses: ["${IP_VLAN_HUMMING}1/30"]
routes:
- network: "${IP_VLAN_HUMMING}0/30"
- network: "0.0.0.0/0"
gateway: "${IP_VLAN_HUMMING}2"
# - deviceSelector: {driver: mlx4_en}
- interface: eth1
mtu: 9000
dhcp: false
- interface: eth2
mtu: 9000
dhcp: false
patches:
# required for Talos to initialize i915 VFIO devices
- |-
machine:
install:
extensions:
- image: ghcr.io/siderolabs/i915-ucode:20230310
# FRR routing
- |-
machine:
files:
- op: create
path: /var/etc/frr/frr.conf
permissions: 0o400
content: |
frr version 8.5.2_git
frr defaults traditional
hostname humming
log stdout
ip forwarding
no ipv6 forwarding
service integrated-vtysh-config
!
interface lo
ip address ${IP_K8S_HUMMING}/32
ip router openfabric 69
openfabric passive
ip ospf area ${IP_K8S_HUMMING}
exit
!
interface br0.882
ip ospf area ${IP_K8S_HUMMING}
ip ospf network broadcast
exit
!
interface eth1
ip router openfabric 69
openfabric csnp-interval 2
openfabric hello-interval 1
openfabric hello-multiplier 2
exit
!
interface eth2
ip router openfabric 69
openfabric csnp-interval 2
openfabric hello-interval 1
openfabric hello-multiplier 2
exit
!
router openfabric 69
net 69.6969.2222.2222.2222.00
lsp-gen-interval 1
max-lsp-lifetime 600
lsp-refresh-interval 180
exit
!
router ospf
ospf router-id ${IP_K8S_HUMMING}
log-adjacency-changes
exit
!
controlPlane:
patches:
- |-
- &kubeletExtraArgs |-
- op: add
path: /machine/kubelet/extraArgs
value:
@@ -73,11 +230,11 @@ controlPlane:
# path: /cluster/controllerManager/extraArgs
# value:
# node-cidr-mask-size: 22
# - |-
# - &wipeWithZeros |-
# machine:
# install:
# wipe: true
- |-
- &machinePatch |-
machine:
network:
extraHostEntries:
@@ -89,12 +246,76 @@ controlPlane:
servers:
- "${IP_ROUTER_LAN}"
bootTimeout: 2m0s
- &kubeletNodeIP |-
machine:
kubelet:
nodeIP:
validSubnets:
- "${IP_ROUTER_LAN_CIDR}"
- "${IP_ROUTER_VLAN_K8S_CIDR}"
- |-
cluster:
etcd:
advertisedSubnets:
- "${IP_ROUTER_VLAN_K8S_CIDR}"
# FRR routing
- &frr-pod |-
machine:
kubelet:
extraMounts:
- source: /var/etc/frr
destination: /var/etc/frr
type: bind
options: ["bind", "rshared", "ro"]
files:
- op: create
path: /var/etc/frr/daemons
permissions: 0o400
content: |
fabricd=yes
bgpd=no
ospfd=yes
ospf6d=no
vtysh_enable=yes
- op: create
path: /var/etc/frr/vtysh.conf
permissions: 0o400
content: |
service integrated-vtysh-config
pods:
- apiVersion: v1
kind: Pod
metadata:
name: &name "frr-host"
namespace: "kube-system"
spec:
hostNetwork: true
containers:
- name: *name
image: "quay.io/frrouting/frr:8.5.2@sha256:d4cb742ae97e43a4ea7cad3fc1e9663365761792d31213f3752668d05cab3e1c"
command: ["/usr/lib/frr/docker-start"]
securityContext:
privileged: true
resources:
requests:
cpu: "100m"
memory: "100Mi"
limits:
cpu: "500m"
memory: "350Mi"
volumeMounts:
- name: frr-config
mountPath: /etc/frr
volumes:
- name: frr-config
hostPath:
path: /var/etc/frr
type: Directory
readOnly: true
- &clusterPatch |-
cluster:
allowSchedulingOnMasters: true
discovery:
@@ -106,45 +327,33 @@ controlPlane:
disabled: true
proxy:
disabled: true
etcd:
advertisedSubnets:
- "${IP_ROUTER_LAN_CIDR}"
# Rook Ceph encrypted OSDs
# TODO: https://github.com/siderolabs/talos/issues/3129
- &encryptedOSD |-
machine:
files:
- op: overwrite
path: /etc/lvm/lvm.conf
permissions: 0o644
content: |
backup {
backup = 0
archive = 0
}
worker:
patches:
- |-
- op: add
path: /machine/kubelet/extraArgs
value:
feature-gates: CronJobTimeZone=true,GracefulNodeShutdown=true,MixedProtocolLBService=true,EphemeralContainers=true,ServerSideApply=true
# - |-
# - op: add
# path: /cluster/controllerManager/extraArgs
# value:
# node-cidr-mask-size: 22
- |-
machine:
install:
wipe: true
network:
extraHostEntries:
- ip: "${IP_K8S_BLACKFISH}"
aliases:
- "c.${DNS_CLUSTER}"
time:
disabled: false
servers:
- "${IP_ROUTER_LAN}"
bootTimeout: 2m0s
- |-
cluster:
allowSchedulingOnMasters: true
discovery:
enabled: true
registries:
kubernetes:
disabled: false
service:
disabled: true
proxy:
disabled: true
- *kubeletExtraArgs
- *machinePatch
- *clusterPatch
- *kubeletNodeIP
- *frr-pod
# Rook Ceph encrypted OSDs
# TODO: https://github.com/siderolabs/talos/issues/3129
- *encryptedOSD

View File

@@ -0,0 +1,2 @@
Generate Cilium install YAML that is Flux HelmRelease compatible:
`kustomize build . --enable-helm > ./cilium.yaml`

View File

@@ -0,0 +1,85 @@
---
## NOTE: required for Talos
securityContext:
privileged: true
capabilities:
ciliumAgent: "{CHOWN,KILL,NET_ADMIN,NET_RAW,IPC_LOCK,SYS_ADMIN,SYS_RESOURCE,DAC_OVERRIDE,FOWNER,SETGID,SETUID}"
cleanCiliumState: "{NET_ADMIN,SYS_ADMIN,SYS_RESOURCE}"
cgroup:
autoMount:
enabled: false
hostRoot: "/sys/fs/cgroup"
## NOTE: Cluster identification, mainly for ClusterMesh
cluster:
name: "${CLUSTER_NAME}"
id: ${CONFIG_CILIUM_CLUSTER_ID}
## NOTE: inter-nodes pod networking configuration
### for native routing (all hosts in same VLAN or L2 domain), highest performance
# tunnel: disabled
# autoDirectNodeRoutes: true
# ipv4NativeRoutingCIDR: "${IP_POD_CIDR_V4}"
### nodes are L3-only routed (different VLAN or L2 domains, or routed via OpenFabric/OSPF/BGP/etc, or nodes with WireGuard/VPN)
### 2023-07-06: Currently setup for 3 nodes 10G star network, with 1G link to upstream router & internet
MTU: 9000
#### Cilium <1.14
# tunnel: geneve
# loadBalancer:
# algorithm: maglev
# mode: snat
#### Cilium 1.14+
tunnelProtocol: geneve
routingMode: tunnel
loadBalancer:
algorithm: maglev
mode: dsr
dsrDispatch: geneve
## NOTE: Cilium's networking internals
ipam:
mode: kubernetes
kubeProxyReplacement: strict
k8sServiceHost: ${IP_CLUSTER_VIP}
k8sServicePort: 6443
kubeProxyReplacementHealthzBindAddr: 0.0.0.0:10256
## NOTE: pods handling, e.g. upon ConfigMap update
rollOutCiliumPods: true
operator:
rollOutPods: true
## NOTE: Cilium additional features and/or CRDs
bpf:
masquerade: true
tproxy: true # L7 netpols stuff
l7Proxy: true # enables L7 netpols
bgpControlPlane:
enabled: true
### `bgpControlPlane.enabled: true` is newer GoBGP implementation, while `bgp.enabled: true` and `bgp.announce` uses older MetalLB BGP implementation that is planned to be deprecated in Cilium v1.15.
### `bgp.announce` block is replaced by CiliumBGPPeeringPolicy CRD used by bgpControlPlane, for more fine grained control over announced addresses
localRedirectPolicy: true
nodePort:
enabled: true
range: 9993,32767
bandwidthManager:
enabled: true
bbr: false # enable after Talos kernel updated to >= 5.18
enableIPv6BIGTCP: false # enable after Talos kernel updated to >= 5.19
### `kubectl get` and `kubectl describe` will reflect CiliumNetworkPolicy & CiliumEndpoints status (policy enforcement etc) with the below enabled
enableCnpStatusUpdates: true
endpointStatus:
enabled: true
status: "policy"
## NOTE: Hubble observability
hubble:
enabled: true
peerService:
clusterDomain: cluster.local
relay:
enabled: true
rollOutPods: true
ui:
enabled: true
rollOutPods: true

View File

@@ -0,0 +1,6 @@
#!/bin/bash
## one of these days, I'll learn and switch to Taskfiles
set -euo pipefail
GITROOT=$(git rev-parse --show-toplevel)
source <(sops -d $1 | yq .data | sed -re 's/^/export /g' | sed -e 's/: /="/g' | sed -re 's/$/"/g')
kustomize build $2 --enable-helm | envsubst

View File

@@ -0,0 +1,28 @@
---
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
helmCharts:
- name: cilium
repo: https://helm.cilium.io/
version: 1.14.0-rc.0
# version: 1.13.4
releaseName: cilium
namespace: kube-system
valuesFile: base-values.yaml
valuesMerge: override
valuesInline:
hubble:
enabled: false
relay:
enabled: false
ui:
enabled: false
tls:
enabled: false
auto:
enabled: false
commonAnnotations:
meta.helm.sh/release-name: cilium
meta.helm.sh/release-namespace: kube-system
commonLabels:
app.kubernetes.io/managed-by: Helm

View File

@@ -0,0 +1,46 @@
---
apiVersion: helm.toolkit.fluxcd.io/v2beta1
kind: HelmRelease
metadata:
name: cilium
namespace: kube-system
annotations:
meta.helm.sh/release-name: cilium
meta.helm.sh/release-namespace: kube-system
labels:
app.kubernetes.io/managed-by: Helm
spec:
interval: 5m
chart:
spec:
chart: cilium
version: "1.14.0-rc.0"
sourceRef:
name: cilium-charts
kind: HelmRepository
namespace: flux-system
valuesFrom:
- kind: ConfigMap
name: cilium-base-install-values
values:
## NOTE: BGP for LoadBalancer services
### `bgpControlPlane.enabled: true` is newer GoBGP implementation, while `bgp.enabled: true` and `bgp.announce` uses older MetalLB BGP implementation that is planned to be deprecated in Cilium v1.15.
### `bgp.announce` block is replaced by CiliumBGPPeeringPolicy CRD used by bgpControlPlane, for more fine grained control over announced addresses
# bgp:
# enabled: true
# announce:
# loadbalancerIP: true
# podCIDR: true
bgpControlPlane:
enabled: true
## NOTE: Hubble observability
hubble:
ui:
ingress:
enabled: true
className: "nginx"
hosts:
- "${APP_DNS_HUBBLE}"
tls:
- hosts:
- "${APP_DNS_HUBBLE}"

View File

@@ -0,0 +1,11 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- hr.yaml
configMapGenerator:
- name: "cilium-base-install-values"
namespace: kube-system
files:
- values.yaml=bootstrap-install/base-values.yaml
configurations:
- kustomizeconfig.yaml

View File

@@ -0,0 +1,6 @@
nameReference:
- kind: ConfigMap
version: v1
fieldSpecs:
- path: spec/valuesFrom/name
kind: HelmRelease

View File

@@ -0,0 +1,50 @@
---
apiVersion: cilium.io/v2alpha1
kind: CiliumBGPPeeringPolicy
# MAKE SURE CRDs ARE INSTALLED IN CLUSTER VIA cilium-config ConfigMap OR Cilium HelmRelease/values.yaml (bgpControlPlane.enabled: true), BEFORE THIS IS APPLIED!
# "CiliumBGPPeeringPolicy" Custom Resource will replace the old MetalLB BGP's "bgp-config" ConfigMap
# "CiliumBGPPeeringPolicy" is used with `bgpControlPlane.enabled: true` which uses GoBGP, NOT the old `bgp.enabled: true` which uses MetalLB
metadata:
name: bgp-loadbalancer-ip-main
spec:
nodeSelector:
matchLabels:
kubernetes.io/os: "linux" # match all Linux nodes, change this to match more granularly if more than 1 PeeringPolicy is to be used throughout cluster
virtualRouters:
- localASN: ${ASN_ROUTER} # ASNs are processed in uint32
exportPodCIDR: false
serviceSelector: # this replaces address-pools, instead of defining the range of IPs that can be assigned to LoadBalancer services, now services have to match below selectors for their LB IPs to be announced
matchExpressions:
- {key: thisFakeSelector, operator: NotIn, values: ['will-match-and-announce-all-services']}
neighbors:
- peerAddress: "${IP_ROUTER_VLAN_K8S}/32" # unlike bgp-config ConfigMap, peerAddress needs to be in CIDR notation
peerASN: ${ASN_ROUTER}
- localASN: ${ASN_EC2_INGRESS}
exportPodCIDR: false
serviceSelector:
matchExpressions:
- {key: thisFakeSelector, operator: NotIn, values: ['will-match-and-announce-all-services']}
neighbors:
- peerAddress: "${IP_EC2_NON_K8S}/32"
peerASN: ${ASN_EC2_INGRESS}
---
apiVersion: v1
kind: ConfigMap
metadata:
name: bgp-config # this "bgp-config" ConfigMap is used for the old `bgp.enabled: true` which is the old MetalLB BGP, this will be deprecated in future releases
namespace: kube-system
data:
config.yaml: |
peers:
- peer-address: "${IP_ROUTER_VLAN_K8S}"
peer-asn: ${ASN_ROUTER}
my-asn: ${ASN_ROUTER}
- peer-address: "${IP_EC2_NON_K8S}"
peer-asn: ${ASN_EC2_INGRESS}
my-asn: ${ASN_EC2_INGRESS}
address-pools:
- name: main-addr-pool
protocol: bgp
avoid-buggy-ips: true
addresses:
- "${IP_LB_CIDR}"

View File

@@ -0,0 +1,19 @@
---
apiVersion: "cilium.io/v2alpha1"
kind: CiliumLoadBalancerIPPool
metadata:
name: main-pool
spec:
cidrs:
- cidr: "${IP_LB_CIDR}"
---
apiVersion: "cilium.io/v2alpha1"
kind: CiliumLoadBalancerIPPool
metadata:
name: dns
spec:
cidrs:
- cidr: "${IP_LB_DNS_CIDR}"
serviceSelector:
matchLabels:
exposeSvc: dns

View File

@@ -0,0 +1,11 @@
---
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: 1-core-1-networking-cilium-app
namespace: flux-system
labels:
kustomization.flux.home.arpa/name: "cilium"
spec:
path: ./kube/deploy/core/_networking/cilium-nuclear/app
dependsOn: []

View File

@@ -0,0 +1,6 @@
---
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- repo.yaml
- ks.yaml

View File

@@ -0,0 +1,38 @@
---
apiVersion: "cilium.io/v2"
kind: CiliumClusterwideNetworkPolicy
metadata:
name: "cluster-default-kube-dns-ingress"
spec:
description: "Policy for ingress allow to kube-dns from all Cilium managed endpoints in the cluster"
endpointSelector:
matchLabels:
k8s:io.kubernetes.pod.namespace: kube-system
k8s-app: kube-dns
ingress:
- fromEndpoints:
- {}
toPorts:
- ports:
- port: "53"
---
apiVersion: "cilium.io/v2"
kind: CiliumClusterwideNetworkPolicy
metadata:
name: "cluster-default-kube-dns-egress"
spec:
description: "Policy for egress allow to kube-dns from all Cilium managed endpoints in the cluster"
endpointSelector: {}
egress:
- toEndpoints:
- matchLabels:
io.kubernetes.pod.namespace: kube-system
k8s-app: kube-dns
toPorts:
- ports:
- port: "53"
# TODO: broken on <1.13.3 and >1.14.0-snapshot.1 to 1.14.0-rc.0 with 10G L3-routed podCIDR interface and 1G L2 default route interface
# TODO: which also means FQDN egress netpols are broken, issue to be opened (tested 2023-07-06)
# rules:
# dns:
# - {}

View File

@@ -0,0 +1,35 @@
---
apiVersion: cilium.io/v2
kind: CiliumNetworkPolicy
metadata:
name: &app flux-system
namespace: *app
spec:
endpointSelector: {}
ingress:
- fromEndpoints:
- {}
egress:
# same namespace
- toEndpoints:
- matchLabels:
io.kubernetes.pod.namespace: *app
# allow all to public Internet
- toEntities:
- world
# TODO: confirm if these are the only ports Flux uses, as well as what FQDNs Flux talks to (GitHub only? then why 443?)
- toEntities:
- world
toPorts:
- ports:
- port: "22"
- ports:
- port: "443"
# k8s apiserver
- toEndpoints:
- matchLabels:
io.kubernetes.pod.namespace: default
component: apiserver
provider: kubernetes
- toEntities:
- kube-apiserver

View File

@@ -0,0 +1,22 @@
---
apiVersion: cilium.io/v2
kind: CiliumNetworkPolicy
metadata:
name: kube-system-allow-all
namespace: kube-system
spec:
endpointSelector: {}
ingress:
- fromEntities:
- cluster
- fromEndpoints:
- {}
- fromEntities:
- world
egress:
- toEntities:
- world
- toEndpoints:
- {}
- toEntities:
- cluster

View File

@@ -0,0 +1,72 @@
---
apiVersion: cilium.io/v2
kind: CiliumNetworkPolicy
metadata:
name: apiserver
namespace: default
spec:
endpointSelector:
matchLabels:
component: apiserver
provider: kubernetes
---
apiVersion: cilium.io/v2
kind: CiliumClusterwideNetworkPolicy
metadata:
name: labelled-allow-egress-apiserver
spec:
endpointSelector:
matchLabels:
egress.home.arpa/apiserver: allow
egress:
- toEntities:
- kube-apiserver
---
apiVersion: cilium.io/v2
kind: CiliumClusterwideNetworkPolicy
metadata:
name: labelled-allow-egress-world
spec:
endpointSelector:
matchLabels:
egress.home.arpa/world: allow
egress:
- toEntities:
- world
---
apiVersion: cilium.io/v2
kind: CiliumClusterwideNetworkPolicy
metadata:
name: labelled-allow-egress-world-https
spec:
endpointSelector:
matchLabels:
egress.home.arpa/world-https: allow
egress:
- toEntities:
- world
toPorts:
- ports:
- port: "443"
protocol: TCP
- port: "443"
protocol: UDP
---
apiVersion: cilium.io/v2
kind: CiliumClusterwideNetworkPolicy
metadata:
name: labelled-allow-egress-home-dns
spec:
endpointSelector:
matchLabels:
egress.home.arpa/home-dns: allow
egress:
- toCIDRSet:
- cidr: "${IP_HOME_DNS}/32"
toPorts:
- ports:
- port: "53"
protocol: ANY
rules:
dns:
- matchPattern: "*"

View File

@@ -0,0 +1,10 @@
---
apiVersion: source.toolkit.fluxcd.io/v1beta1
kind: HelmRepository
metadata:
name: cilium-charts
namespace: flux-system
spec:
interval: 10m0s
timeout: 3m0s
url: https://helm.cilium.io/