Add VRF device to support egress gateways (#62)

* Add VRF device support

* cluster-template-cilium-load-balancer: metallb based loadbalancer nodes

* LoadBalancerNodes: Documentation

* load-balancer-nodes: taint and toleration for metallb

* cluster-template-cilium-load-balancer: force ipvs/strictARP for metallb
This commit is contained in:
Felix Wischke
2024-02-13 13:53:32 +01:00
committed by GitHub
parent 8bc8ff98f9
commit de5099dfe1
16 changed files with 1774 additions and 25 deletions

View File

@@ -227,6 +227,18 @@ CALICO_VERSION ?= v3.26.3
crs-calico: ## Generates crs manifests for Calico.
curl -o templates/crs/cni/calico.yaml https://raw.githubusercontent.com/projectcalico/calico/$(CALICO_VERSION)/manifests/calico.yaml
METALLB_VERSION ?= 0.14.3
FRR_K8S_DIR = metallb/charts/metallb/charts/frr-k8s/templates
METALLB_TOLERATIONS = [{"key": "node-role.kubernetes.io/load-balancer", "operator": "Exists", "effect": "NoSchedule"}]
.PHONY: crs-metallb
crs-metallb: ## Generates crs manifests for MetalLB.
$(HELM) repo add metallb https://metallb.github.io/metallb
$(HELM) template metallb metallb/metallb --version $(METALLB_VERSION) --set frrk8s.enabled=true,speaker.frr.enabled=false --set-json 'controller.tolerations=$(METALLB_TOLERATIONS)' --set-json 'speaker.tolerations=$(METALLB_TOLERATIONS)' --set-json 'frr-k8s.frrk8s.tolerations=$(METALLB_TOLERATIONS)' --namespace=metallb-system > templates/crs/metallb.yaml
@# fixup namespacing in frr-k8s to work with clusterresourcesets
@sed -e '7bp;48bp;69bp;1682bp;1854bp;1887bp;2253bp;bn' -e ':p i\ namespace: "metallb-system"' -e ':n' -i templates/crs/metallb.yaml
##@ Release
## --------------------------------------
## Release

View File

@@ -203,6 +203,110 @@ type NetworkSpec struct {
// +listType=map
// +listMapKey=name
AdditionalDevices []AdditionalNetworkDevice `json:"additionalDevices,omitempty"`
// VirtualNetworkDevices defines virtual network devices (e.g. bridges, vlans ...).
VirtualNetworkDevices `json:",inline"`
}
// InterfaceConfig contains all configurables a network interface can have.
type InterfaceConfig struct {
// IPv4PoolRef is a reference to an IPAM Pool resource, which exposes IPv4 addresses.
// The network device will use an available IP address from the referenced pool.
// This can be combined with `IPv6PoolRef` in order to enable dual stack.
// +optional
// +kubebuilder:validation:XValidation:rule="self.apiGroup == 'ipam.cluster.x-k8s.io'",message="ipv4PoolRef allows only IPAM apiGroup ipam.cluster.x-k8s.io"
// +kubebuilder:validation:XValidation:rule="self.kind == 'InClusterIPPool' || self.kind == 'GlobalInClusterIPPool'",message="ipv4PoolRef allows either InClusterIPPool or GlobalInClusterIPPool"
IPv4PoolRef *corev1.TypedLocalObjectReference `json:"ipv4PoolRef,omitempty"`
// IPv6PoolRef is a reference to an IPAM pool resource, which exposes IPv6 addresses.
// The network device will use an available IP address from the referenced pool.
// this can be combined with `IPv4PoolRef` in order to enable dual stack.
// +optional
// +kubebuilder:validation:XValidation:rule="self.apiGroup == 'ipam.cluster.x-k8s.io'",message="ipv6PoolRef allows only IPAM apiGroup ipam.cluster.x-k8s.io"
// +kubebuilder:validation:XValidation:rule="self.kind == 'InClusterIPPool' || self.kind == 'GlobalInClusterIPPool'",message="ipv6PoolRef allows either InClusterIPPool or GlobalInClusterIPPool"
IPv6PoolRef *corev1.TypedLocalObjectReference `json:"ipv6PoolRef,omitempty"`
// DNSServers contains information about nameservers to be used for this interface.
// If this field is not set, it will use the default dns servers from the ProxmoxCluster.
// +optional
// +kubebuilder:validation:MinItems=1
DNSServers []string `json:"dnsServers,omitempty"`
}
// RouteSpec describes an IPv4/IPv6 Route.
type RouteSpec struct {
// To is the subnet to be routed.
// +optional
To string `json:"to,omitempty"`
// Via is the gateway to the subnet.
// +optional
Via string `json:"via,omitempty"`
// Metric is the priority of the route in the routing table.
// +optional
Metric uint32 `json:"metric,omitempty"`
// Table is the routing table used for this route.
// +optional
Table uint32 `json:"table,omitempty"`
}
// RoutingPolicySpec is a linux FIB rule.
type RoutingPolicySpec struct {
// To is the subnet of the target.
// +optional
To string `json:"to,omitempty"`
// From is the subnet of the source.
// +optional
From string `json:"from,omitempty"`
// Table is the routing table id.
// +optional
Table uint32 `json:"table,omitempty"`
// Priority is the position in the ip rule fib table.
// +kubebuilder:validation:Maximum=4294967295
// +kubebuilder:validation:XValidation:message="Cowardly refusing to insert fib rule matching kernel rules",rule="(self > 0 && self < 32765) || (self > 32766)"
// +optional
Priority uint32 `json:"priority,omitempty"`
}
// VRFDevice defines Virtual Routing Flow devices.
type VRFDevice struct {
// Interfaces is the list of proxmox network devices managed by this virtual device.
Interfaces []string `json:"interfaces,omitempty"`
// Name is the virtual network device name.
// must be unique within the virtual machine.
// +kubebuilder:validation:MinLength=3
Name string `json:"name"`
// Table is the ID of the routing table used for the l3mdev vrf device.
// +kubebuilder:validation:Maximum=4294967295
// +kubebuilder:validation:XValidation:message="Cowardly refusing to insert l3mdev rules into kernel tables",rule="(self > 0 && self < 254) || (self > 255)"
Table uint32 `json:"table"`
// InterfaceConfig contains all configurables a network interface can have.
// +optional
InterfaceConfig `json:",inline"`
// Routes are the routes associated with the l3mdev policy.
// +optional
// +kubebuilder:validation:MinItems=1
Routes []RouteSpec `json:"routes,omitempty"`
// RoutingPolicy is the l3mdev policy inserted into FiB.
// +optional
// +kubebuilder:validation:MinItems=1
RoutingPolicy []RoutingPolicySpec `json:"routingPolicy,omitempty"`
}
// VirtualNetworkDevices defines linux software networking devices.
type VirtualNetworkDevices struct {
// Definition of a Vrf Device.
// +optional
// +listType=map
// +listMapKey=name
VRFs []VRFDevice `json:"vrfs,omitempty"`
}
// NetworkDevice defines the required details of a virtual machine network device.
@@ -262,7 +366,7 @@ type AdditionalNetworkDevice struct {
// ProxmoxMachineStatus defines the observed state of ProxmoxMachine.
type ProxmoxMachineStatus struct {
// Ready indicates the Docker infrastructure has been provisioned and is ready
// Ready indicates the Docker infrastructure has been provisioned and is ready.
// +optional
Ready bool `json:"ready"`
@@ -282,13 +386,13 @@ type ProxmoxMachineStatus struct {
// +optional
IPAddresses map[string]IPAddress `json:"ipAddresses,omitempty"`
// Network returns the network status for each of the machine's configured
// Network returns the network status for each of the machine's configured.
// network interfaces.
// +optional
Network []NetworkStatus `json:"network,omitempty"`
// ProxmoxNode is the name of the proxmox node, which was chosen for this
// machine to be deployed on
// machine to be deployed on.
// +optional
ProxmoxNode *string `json:"proxmoxNode,omitempty"`
@@ -298,7 +402,7 @@ type ProxmoxMachineStatus struct {
// +optional
TaskRef *string `json:"taskRef,omitempty"`
// RetryAfter tracks the time we can retry queueing a task
// RetryAfter tracks the time we can retry queueing a task.
// +optional
RetryAfter metav1.Time `json:"retryAfter,omitempty"`

View File

@@ -224,5 +224,36 @@ var _ = Describe("ProxmoxMachine Test", func() {
Expect(k8sClient.Create(context.Background(), dm)).Should(MatchError(ContainSubstring("should be less than or equal to 65520")))
})
It("Should only allow VRFS with a non kernel routing table ", func() {
dm := defaultMachine()
dm.Spec.Network = &NetworkSpec{
VirtualNetworkDevices: VirtualNetworkDevices{
VRFs: []VRFDevice{{
Name: "vrf-blue",
Table: 254,
}},
},
}
Expect(k8sClient.Create(context.Background(), dm)).Should(MatchError(ContainSubstring("Cowardly refusing to insert l3mdev rules into kernel tables")))
})
It("Should only allow non kernel FIB rule priority", func() {
dm := defaultMachine()
dm.Spec.Network = &NetworkSpec{
VirtualNetworkDevices: VirtualNetworkDevices{
VRFs: []VRFDevice{{
Name: "vrf-blue",
Table: 100,
RoutingPolicy: []RoutingPolicySpec{{
Priority: 32766,
}},
}},
},
}
Expect(k8sClient.Create(context.Background(), dm)).Should(MatchError(ContainSubstring("Cowardly refusing to insert fib rule matching kernel rules")))
})
})
})

View File

@@ -74,6 +74,36 @@ func (in *IPAddress) DeepCopy() *IPAddress {
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *InterfaceConfig) DeepCopyInto(out *InterfaceConfig) {
*out = *in
if in.IPv4PoolRef != nil {
in, out := &in.IPv4PoolRef, &out.IPv4PoolRef
*out = new(v1.TypedLocalObjectReference)
(*in).DeepCopyInto(*out)
}
if in.IPv6PoolRef != nil {
in, out := &in.IPv6PoolRef, &out.IPv6PoolRef
*out = new(v1.TypedLocalObjectReference)
(*in).DeepCopyInto(*out)
}
if in.DNSServers != nil {
in, out := &in.DNSServers, &out.DNSServers
*out = make([]string, len(*in))
copy(*out, *in)
}
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new InterfaceConfig.
func (in *InterfaceConfig) DeepCopy() *InterfaceConfig {
if in == nil {
return nil
}
out := new(InterfaceConfig)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *NetworkDevice) DeepCopyInto(out *NetworkDevice) {
*out = *in
@@ -114,6 +144,7 @@ func (in *NetworkSpec) DeepCopyInto(out *NetworkSpec) {
(*in)[i].DeepCopyInto(&(*out)[i])
}
}
in.VirtualNetworkDevices.DeepCopyInto(&out.VirtualNetworkDevices)
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NetworkSpec.
@@ -572,6 +603,36 @@ func (in *ProxmoxMachineTemplateSpec) DeepCopy() *ProxmoxMachineTemplateSpec {
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *RouteSpec) DeepCopyInto(out *RouteSpec) {
*out = *in
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RouteSpec.
func (in *RouteSpec) DeepCopy() *RouteSpec {
if in == nil {
return nil
}
out := new(RouteSpec)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *RoutingPolicySpec) DeepCopyInto(out *RoutingPolicySpec) {
*out = *in
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RoutingPolicySpec.
func (in *RoutingPolicySpec) DeepCopy() *RoutingPolicySpec {
if in == nil {
return nil
}
out := new(RoutingPolicySpec)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *SchedulerHints) DeepCopyInto(out *SchedulerHints) {
*out = *in
@@ -612,6 +673,37 @@ func (in *Storage) DeepCopy() *Storage {
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *VRFDevice) DeepCopyInto(out *VRFDevice) {
*out = *in
if in.Interfaces != nil {
in, out := &in.Interfaces, &out.Interfaces
*out = make([]string, len(*in))
copy(*out, *in)
}
in.InterfaceConfig.DeepCopyInto(&out.InterfaceConfig)
if in.Routes != nil {
in, out := &in.Routes, &out.Routes
*out = make([]RouteSpec, len(*in))
copy(*out, *in)
}
if in.RoutingPolicy != nil {
in, out := &in.RoutingPolicy, &out.RoutingPolicy
*out = make([]RoutingPolicySpec, len(*in))
copy(*out, *in)
}
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new VRFDevice.
func (in *VRFDevice) DeepCopy() *VRFDevice {
if in == nil {
return nil
}
out := new(VRFDevice)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *VirtualMachine) DeepCopyInto(out *VirtualMachine) {
*out = *in
@@ -688,3 +780,25 @@ func (in *VirtualMachineCloneSpec) DeepCopy() *VirtualMachineCloneSpec {
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *VirtualNetworkDevices) DeepCopyInto(out *VirtualNetworkDevices) {
*out = *in
if in.VRFs != nil {
in, out := &in.VRFs, &out.VRFs
*out = make([]VRFDevice, len(*in))
for i := range *in {
(*in)[i].DeepCopyInto(&(*out)[i])
}
}
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new VirtualNetworkDevices.
func (in *VirtualNetworkDevices) DeepCopy() *VirtualNetworkDevices {
if in == nil {
return nil
}
out := new(VirtualNetworkDevices)
in.DeepCopyInto(out)
return out
}

View File

@@ -262,6 +262,162 @@ spec:
required:
- bridge
type: object
vrfs:
description: Definition of a Vrf Device.
items:
description: VRFDevice defines Virtual Routing Flow devices.
properties:
dnsServers:
description: DNSServers contains information about nameservers
to be used for this interface. If this field is not set,
it will use the default dns servers from the ProxmoxCluster.
items:
type: string
minItems: 1
type: array
interfaces:
description: Interfaces is the list of proxmox network devices
managed by this virtual device.
items:
type: string
type: array
ipv4PoolRef:
description: IPv4PoolRef is a reference to an IPAM Pool
resource, which exposes IPv4 addresses. The network device
will use an available IP address from the referenced pool.
This can be combined with `IPv6PoolRef` in order to enable
dual stack.
properties:
apiGroup:
description: APIGroup is the group for the resource
being referenced. If APIGroup is not specified, the
specified Kind must be in the core API group. For
any other third-party types, APIGroup is required.
type: string
kind:
description: Kind is the type of resource being referenced
type: string
name:
description: Name is the name of resource being referenced
type: string
required:
- kind
- name
type: object
x-kubernetes-map-type: atomic
x-kubernetes-validations:
- message: ipv4PoolRef allows only IPAM apiGroup ipam.cluster.x-k8s.io
rule: self.apiGroup == 'ipam.cluster.x-k8s.io'
- message: ipv4PoolRef allows either InClusterIPPool or
GlobalInClusterIPPool
rule: self.kind == 'InClusterIPPool' || self.kind == 'GlobalInClusterIPPool'
ipv6PoolRef:
description: IPv6PoolRef is a reference to an IPAM pool
resource, which exposes IPv6 addresses. The network device
will use an available IP address from the referenced pool.
this can be combined with `IPv4PoolRef` in order to enable
dual stack.
properties:
apiGroup:
description: APIGroup is the group for the resource
being referenced. If APIGroup is not specified, the
specified Kind must be in the core API group. For
any other third-party types, APIGroup is required.
type: string
kind:
description: Kind is the type of resource being referenced
type: string
name:
description: Name is the name of resource being referenced
type: string
required:
- kind
- name
type: object
x-kubernetes-map-type: atomic
x-kubernetes-validations:
- message: ipv6PoolRef allows only IPAM apiGroup ipam.cluster.x-k8s.io
rule: self.apiGroup == 'ipam.cluster.x-k8s.io'
- message: ipv6PoolRef allows either InClusterIPPool or
GlobalInClusterIPPool
rule: self.kind == 'InClusterIPPool' || self.kind == 'GlobalInClusterIPPool'
name:
description: Name is the virtual network device name. must
be unique within the virtual machine.
minLength: 3
type: string
routes:
description: Routes are the routes associated with the l3mdev
policy.
items:
description: RouteSpec describes an IPv4/IPv6 Route.
properties:
metric:
description: Metric is the priority of the route in
the routing table.
format: int32
type: integer
table:
description: Table is the routing table used for this
route.
format: int32
type: integer
to:
description: To is the subnet to be routed.
type: string
via:
description: Via is the gateway to the subnet.
type: string
type: object
minItems: 1
type: array
routingPolicy:
description: RoutingPolicy is the l3mdev policy inserted
into FiB.
items:
description: RoutingPolicySpec is a linux FIB rule.
properties:
from:
description: From is the subnet of the source.
type: string
priority:
description: Priority is the position in the ip rule
fib table.
format: int32
maximum: 4294967295
type: integer
x-kubernetes-validations:
- message: Cowardly refusing to insert fib rule matching
kernel rules
rule: (self > 0 && self < 32765) || (self > 32766)
table:
description: Table is the routing table id.
format: int32
type: integer
to:
description: To is the subnet of the target.
type: string
type: object
minItems: 1
type: array
table:
description: Table is the ID of the routing table used for
the l3mdev vrf device.
format: int32
maximum: 4294967295
type: integer
x-kubernetes-validations:
- message: Cowardly refusing to insert l3mdev rules into
kernel tables
rule: (self > 0 && self < 254) || (self > 255)
required:
- name
- table
type: object
type: array
x-kubernetes-list-map-keys:
- name
x-kubernetes-list-type: map
type: object
numCores:
description: NumCores is the number of cores per CPU socket in a virtual
@@ -440,7 +596,7 @@ spec:
type: object
network:
description: Network returns the network status for each of the machine's
configured network interfaces.
configured. network interfaces.
items:
description: NetworkStatus provides information about one of a VM's
networks.
@@ -467,14 +623,14 @@ spec:
type: array
proxmoxNode:
description: ProxmoxNode is the name of the proxmox node, which was
chosen for this machine to be deployed on
chosen for this machine to be deployed on.
type: string
ready:
description: Ready indicates the Docker infrastructure has been provisioned
and is ready
and is ready.
type: boolean
retryAfter:
description: RetryAfter tracks the time we can retry queueing a task
description: RetryAfter tracks the time we can retry queueing a task.
format: date-time
type: string
taskRef:

View File

@@ -282,6 +282,177 @@ spec:
required:
- bridge
type: object
vrfs:
description: Definition of a Vrf Device.
items:
description: VRFDevice defines Virtual Routing Flow
devices.
properties:
dnsServers:
description: DNSServers contains information about
nameservers to be used for this interface. If
this field is not set, it will use the default
dns servers from the ProxmoxCluster.
items:
type: string
minItems: 1
type: array
interfaces:
description: Interfaces is the list of proxmox network
devices managed by this virtual device.
items:
type: string
type: array
ipv4PoolRef:
description: IPv4PoolRef is a reference to an IPAM
Pool resource, which exposes IPv4 addresses. The
network device will use an available IP address
from the referenced pool. This can be combined
with `IPv6PoolRef` in order to enable dual stack.
properties:
apiGroup:
description: APIGroup is the group for the resource
being referenced. If APIGroup is not specified,
the specified Kind must be in the core API
group. For any other third-party types, APIGroup
is required.
type: string
kind:
description: Kind is the type of resource being
referenced
type: string
name:
description: Name is the name of resource being
referenced
type: string
required:
- kind
- name
type: object
x-kubernetes-map-type: atomic
x-kubernetes-validations:
- message: ipv4PoolRef allows only IPAM apiGroup
ipam.cluster.x-k8s.io
rule: self.apiGroup == 'ipam.cluster.x-k8s.io'
- message: ipv4PoolRef allows either InClusterIPPool
or GlobalInClusterIPPool
rule: self.kind == 'InClusterIPPool' || self.kind
== 'GlobalInClusterIPPool'
ipv6PoolRef:
description: IPv6PoolRef is a reference to an IPAM
pool resource, which exposes IPv6 addresses. The
network device will use an available IP address
from the referenced pool. this can be combined
with `IPv4PoolRef` in order to enable dual stack.
properties:
apiGroup:
description: APIGroup is the group for the resource
being referenced. If APIGroup is not specified,
the specified Kind must be in the core API
group. For any other third-party types, APIGroup
is required.
type: string
kind:
description: Kind is the type of resource being
referenced
type: string
name:
description: Name is the name of resource being
referenced
type: string
required:
- kind
- name
type: object
x-kubernetes-map-type: atomic
x-kubernetes-validations:
- message: ipv6PoolRef allows only IPAM apiGroup
ipam.cluster.x-k8s.io
rule: self.apiGroup == 'ipam.cluster.x-k8s.io'
- message: ipv6PoolRef allows either InClusterIPPool
or GlobalInClusterIPPool
rule: self.kind == 'InClusterIPPool' || self.kind
== 'GlobalInClusterIPPool'
name:
description: Name is the virtual network device
name. must be unique within the virtual machine.
minLength: 3
type: string
routes:
description: Routes are the routes associated with
the l3mdev policy.
items:
description: RouteSpec describes an IPv4/IPv6
Route.
properties:
metric:
description: Metric is the priority of the
route in the routing table.
format: int32
type: integer
table:
description: Table is the routing table used
for this route.
format: int32
type: integer
to:
description: To is the subnet to be routed.
type: string
via:
description: Via is the gateway to the subnet.
type: string
type: object
minItems: 1
type: array
routingPolicy:
description: RoutingPolicy is the l3mdev policy
inserted into FiB.
items:
description: RoutingPolicySpec is a linux FIB
rule.
properties:
from:
description: From is the subnet of the source.
type: string
priority:
description: Priority is the position in the
ip rule fib table.
format: int32
maximum: 4294967295
type: integer
x-kubernetes-validations:
- message: Cowardly refusing to insert fib
rule matching kernel rules
rule: (self > 0 && self < 32765) || (self
> 32766)
table:
description: Table is the routing table id.
format: int32
type: integer
to:
description: To is the subnet of the target.
type: string
type: object
minItems: 1
type: array
table:
description: Table is the ID of the routing table
used for the l3mdev vrf device.
format: int32
maximum: 4294967295
type: integer
x-kubernetes-validations:
- message: Cowardly refusing to insert l3mdev rules
into kernel tables
rule: (self > 0 && self < 254) || (self > 255)
required:
- name
- table
type: object
type: array
x-kubernetes-list-map-keys:
- name
x-kubernetes-list-type: map
type: object
numCores:
description: NumCores is the number of cores per CPU socket

View File

@@ -95,14 +95,14 @@ IP_PREFIX: "25" # Subnet Mask in C
DNS_SERVERS: "[8.8.8.8,8.8.4.4]" # The dns nameservers for the machines network-config.
BRIDGE: "vmbr1" # The network bridge device for Proxmox VE VMs
## -- xl nodes-- ##
BOOT_VOLUME_DEVICE: "scsi0" # The device used for the boot disk.
BOOT_VOLUME_SIZE: "100" # The size of the boot disk in GB.
NUM_SOCKETS: "2" # The number of sockets for the VMs.
NUM_CORES: "4" # The number of cores for the VMs.
MEMORY_MIB: "8048" # The memory size for the VMs.
## -- xl nodes -- ##
BOOT_VOLUME_DEVICE: "scsi0" # The device used for the boot disk.
BOOT_VOLUME_SIZE: "100" # The size of the boot disk in GB.
NUM_SOCKETS: "2" # The number of sockets for the VMs.
NUM_CORES: "4" # The number of cores for the VMs.
MEMORY_MIB: "8048" # The memory size for the VMs.
EXP_CLUSTER_RESOURCE_SET: "true" # This enables the ClusterResourceSet feature that we are using to deploy CNI
EXP_CLUSTER_RESOURCE_SET: "true" # This enables the ClusterResourceSet feature that we are using to deploy CNI
```
the `CONTROL_PLANE_ENDPOINT_IP` is an IP that must be on the same subnet as the control plane machines
@@ -161,12 +161,13 @@ For templates using `CNI`s you're required to create `ConfigMaps` to make `Clust
We provide the following templates:
| Flavor | Tepmlate File | CRS File |
|----------------| ----------------------------------------------- |-------------------------------|
| cilium | templates/cluster-template-cilium.yaml | templates/crs/cni/cilium.yaml |
| calico | templates/cluster-template-calico.yaml | templates/crs/cni/calico.yaml |
| multiple-vlans | templates/cluster-template-multiple-vlans.yaml | - |
| default | templates/cluster-template.yaml | - |
| Flavor | Tepmlate File | CRS File |
|---------------------| -----------------------------------------------------|-----------------------------------------------------------|
| cilium | templates/cluster-template-cilium.yaml | templates/crs/cni/cilium.yaml |
| calico | templates/cluster-template-calico.yaml | templates/crs/cni/calico.yaml |
| multiple-vlans | templates/cluster-template-multiple-vlans.yaml | - |
| default | templates/cluster-template.yaml | - |
| cilium loadbalancer | templates/cluster-template-cilium-load-balancer.yaml | templates/crs/cni/cilium.yaml, templates/crs/metallb.yaml |
For more information about advanced clusters please check our [advanced setups docs](advanced-setups.md).
@@ -184,7 +185,7 @@ Now install the ConfigMap into your k8s:
kubectl create cm cilium --from-file=data=templates/crs/cni/cilium.yaml
```
Now, you can create a cluster using the cilium flavor:
Now, you can create a cluster using the cilium flavor:
```bash
$ clusterctl generate cluster proxmox-cilium \

View File

@@ -71,6 +71,62 @@ clusterctl generate cluster test-duacl-stack \
--flavor=dual-stack > cluster.yaml
```
## Cluster with LoadBalancer nodes
The template for LoadBalancers is for [dual stack](##dual-stack) with [multiple nics](##multiple-nics). All
environment variables regarding those need to be set. You may want to reduce the template to your usecase.
The idea is that there are special nodes for load balancing. These have an extra network card which is supposed
to be connected to the BGP receiving switches. All services exposed with the type "LoadBalancer" will take an
IP from `METALLB_IPV4_RANGE` or `METALLB_IPV6_RANGE` which will be announced to the BGP peers.
The template presupposes two bgp peers per address family (ipv4,ipv6) because this is a high availability setup.
For the routing to work, we employ source ip based routing. This does not work (reliably) without source IPs.
For this reason, all nodes are created with `ipvs` in kube-proxy. This neccesitates also setting `strictARP`,
as otherwise packets may still take wrong paths and cause reverse path filter issues.
If you require changing `METALLB_IPV{4,6}_RANGE` after a cluster has been deployed, you need to redeploy load balancer
nodes, as these variables are also used in bootstrap to establish source ip based routing.
LoadBalancer nodes are tainted and only run pods required for load balancing.
```
## -- loadbalancer nodes -- #
LOAD_BALANCER_MACHINE_COUNT: 2 # Number of load balancer nodes
EXT_SERVICE_BRIDGE: "vmbr2" # The network bridge device used for load balancing and bgp.
LB_BGP_IPV4_RANGES: "[172.16.4.10-172.16.4.20]" # The IP ranges used by the cluster for establishing the bgp session.
LB_BGP_IPV6_RANGES:
LB_BGP_IPV4_PREFIX: "24" # Subnet Mask in CIDR notation for your bgp IP ranges.
LB_BGP_IPV6_PREFIX:
METALLB_IPV4_ASN: "65400" # The nodes bgp asn.
METALLB_IPV6_ASN:
METALLB_IPV4_BGP_PEER: "172.16.4.1" # The nodes bgp peer IP address.
METALLB_IPV4_BGP_PEER2: "172.16.4.2" # Backup bgp peer for H/A
METALLB_IPV6_BGP_PEER:
METALLB_IPV6_BGP_PEER2:
METALLB_IPV4_BGP_SECRET: "REDACTED" # The secret required to establish a bgp session (if any).
METALLB_IPV6_BGP_SECRET:
METALLB_IPV4_BGP_PEER_ASN: "65500" # The bgp peer's asn.
METALLB_IPV4_BGP_PEER2_ASN: # Backup bgp peer's asn
METALLB_IPV6_BGP_PEER_ASN:
METALLB_IPV6_BGP_PEER2_ASN:
METALLB_IPV4_RANGE: 7.6.5.0/24 # The IP Range MetalLB uses to announce your services.
METALLB_IPV6_RANGE:
```
#### Generate a Cluster
```bash
clusterctl generate cluster test-bgp-lb \
--infrastructure proxmox \
--kubernetes-version v1.28.3 \
--control-plane-machine-count=1 \
--worker-machine-count=2 \
--flavor=cilium-load-balancer > cluster.yaml
```
#### Node over-/ underprovisioning
By default our scheduler only allows to allocate as much memory to guests as the host has. This might not be a desirable behaviour in all cases. For example, one might to explicitly want to overprovision their host's memory, or to reserve bit of the host's memory for itself.

View File

@@ -33,3 +33,25 @@ export SECONDARY_BRIDGE=vmbr1
export NODE_IPV6_RANGES="[2001:db8:1::1-2001:db8:1::10]"
export IPV6_PREFIX=64
export IPV6_GATEWAY="2001:db8:1::1"
# LoadBalancers
export EXT_SERVICE_BRIDGE=vmbr2
export LB_BGP_IPV4_RANGES="[172.16.4.10-172.16.4.20]"
export LB_BGP_IPV4_PREFIX=24
export LB_BGP_IPV6_RANGE="[2001:db8:1::10-2001:db8:1::ffff]"
export LB_BGP_IPV6_PREFIX=64
export METALLB_IPV4_ASN=65400
export METALLB_IPV4_BGP_PEER=172.24.16.1
export METALLB_IPV4_BGP_PEER2=172.24.16.2
export METALLB_IPV4_BGP_SECRET=REDACTED
export METALLB_IPV4_BGP_PEER_ASN=65500
export METALLB_IPV4_BGP_PEER2_ASN=65500
export METALLB_IPV4_RANGE=7.6.5.0/24
export METALLB_IPV6_ASN=65400
export METALLB_IPV6_BGP_PEER=2001:db8:1::1
export METALLB_IPV6_BGP_PEER2=2001:db8:1::2
export METALLB_IPV6_BGP_SECRET=REDACTED
export METALLB_IPV6_BGP_PEER_ASN=65500
export METALLB_IPV6_BGP_PEER2_ASN=65500
export METALLB_IPV6_RANGE=2001:db8:2::0/64

View File

@@ -124,7 +124,7 @@ func getBootstrapData(ctx context.Context, scope *scope.MachineScope) ([]byte, e
func getNetworkConfigData(ctx context.Context, machineScope *scope.MachineScope) ([]cloudinit.NetworkConfigData, error) {
// provide a default in case network is not defined
network := ptr.Deref(machineScope.ProxmoxMachine.Spec.Network, infrav1alpha1.NetworkSpec{})
networkConfigData := make([]cloudinit.NetworkConfigData, 0, 1+len(network.AdditionalDevices))
networkConfigData := make([]cloudinit.NetworkConfigData, 0, 1+len(network.AdditionalDevices)+len(network.VRFs))
defaultConfig, err := getDefaultNetworkDevice(ctx, machineScope)
if err != nil {
@@ -138,9 +138,43 @@ func getNetworkConfigData(ctx context.Context, machineScope *scope.MachineScope)
}
networkConfigData = append(networkConfigData, additionalConfig...)
virtualConfig, err := getVirtualNetworkDevices(ctx, machineScope, network, networkConfigData)
if err != nil {
return nil, err
}
networkConfigData = append(networkConfigData, virtualConfig...)
return networkConfigData, nil
}
func getRoutingData(routes []infrav1alpha1.RouteSpec) *[]cloudinit.RoutingData {
routingData := make([]cloudinit.RoutingData, 0, len(routes))
for _, route := range routes {
routeSpec := cloudinit.RoutingData{}
routeSpec.To = route.To
routeSpec.Via = route.Via
routeSpec.Metric = route.Metric
routeSpec.Table = route.Table
routingData = append(routingData, routeSpec)
}
return &routingData
}
func getRoutingPolicyData(rules []infrav1alpha1.RoutingPolicySpec) *[]cloudinit.FIBRuleData {
routingPolicyData := make([]cloudinit.FIBRuleData, 0, len(rules))
for _, rule := range rules {
ruleSpec := cloudinit.FIBRuleData{}
ruleSpec.To = rule.To
ruleSpec.From = rule.From
ruleSpec.Priority = rule.Priority
ruleSpec.Table = rule.Table
routingPolicyData = append(routingPolicyData, ruleSpec)
}
return &routingPolicyData
}
func getNetworkConfigDataForDevice(ctx context.Context, machineScope *scope.MachineScope, device string) (*cloudinit.NetworkConfigData, error) {
nets := machineScope.VirtualMachine.VirtualMachineConfig.MergeNets()
// For nics supporting multiple IP addresses, we need to cut the '-inet' or '-inet6' part,
@@ -207,13 +241,44 @@ func getDefaultNetworkDevice(ctx context.Context, machineScope *scope.MachineSco
config.Gateway6 = conf.Gateway6
}
}
config.Name = "eth0"
config.Type = "ethernet"
config.ProxName = "net0"
return []cloudinit.NetworkConfigData{config}, nil
}
func getVirtualNetworkDevices(_ context.Context, _ *scope.MachineScope, network infrav1alpha1.NetworkSpec, data []cloudinit.NetworkConfigData) ([]cloudinit.NetworkConfigData, error) {
networkConfigData := make([]cloudinit.NetworkConfigData, 0, len(network.VRFs))
for _, device := range network.VRFs {
var config = ptr.To(cloudinit.NetworkConfigData{})
config.Type = "vrf"
config.Name = device.Name
config.Table = device.Table
for i, child := range device.Interfaces {
for _, net := range data {
if (net.Name == child) || (net.ProxName == child) {
config.Interfaces = append(config.Interfaces, net.Name)
}
}
if len(config.Interfaces)-1 < i {
return nil, errors.Errorf("unable to find vrf interface=%s child interface %s", config.Name, child)
}
}
config.Routes = *getRoutingData(device.Routes)
config.FIBRules = *getRoutingPolicyData(device.RoutingPolicy)
networkConfigData = append(networkConfigData, *config)
}
return networkConfigData, nil
}
func getAdditionalNetworkDevices(ctx context.Context, machineScope *scope.MachineScope, network infrav1alpha1.NetworkSpec) ([]cloudinit.NetworkConfigData, error) {
networkConfigData := make([]cloudinit.NetworkConfigData, 0, len(network.AdditionalDevices))
// additional network devices append after the provisioning interface
var index = 1
// additional network devices.
for _, nic := range network.AdditionalDevices {
var config = ptr.To(cloudinit.NetworkConfigData{})
@@ -252,6 +317,11 @@ func getAdditionalNetworkDevices(ctx context.Context, machineScope *scope.Machin
}
}
config.Name = fmt.Sprintf("eth%d", index)
index++
config.Type = "ethernet"
config.ProxName = nic.Name
if len(config.MacAddress) > 0 {
networkConfigData = append(networkConfigData, *config)
}

View File

@@ -135,6 +135,48 @@ func TestGetNetworkConfigDataForDevice_MissingMACAddress(t *testing.T) {
require.Nil(t, cfg)
}
func TestGetRoutingDataMock(t *testing.T) {
// The underlying copy code can not fail. This test only exists for coverage
routes := *getRoutingData([]infrav1alpha1.RouteSpec{
{To: "default", Via: "192.168.178.1"},
{To: "172.24.16.0/24", Via: "192.168.178.1", Table: 100},
})
require.Equal(t, "default", routes[0].To)
require.NoError(t, nil)
}
func TestGetRoutingpolicyDataMock(t *testing.T) {
// The underlying copy code can not fail. This test only exists for coverage
rules := *getRoutingPolicyData([]infrav1alpha1.RoutingPolicySpec{
{To: "10.10.10.0/24", Table: 100},
{From: "172.24.16.0/24", Table: 100},
})
require.Equal(t, "10.10.10.0/24", rules[0].To)
require.NoError(t, nil)
}
func TestGetVirtualNetworkDevices_VRFDevice_MissingInterface(t *testing.T) {
machineScope, _, _ := setupReconcilerTest(t)
machineScope.SetVirtualMachine(newStoppedVM())
networkSpec := infrav1alpha1.NetworkSpec{
VirtualNetworkDevices: infrav1alpha1.VirtualNetworkDevices{
VRFs: []infrav1alpha1.VRFDevice{{
Name: "vrf-blue",
Table: 500,
Interfaces: []string{"net1"},
}},
},
}
networkConfigData := []cloudinit.NetworkConfigData{{}}
cfg, err := getVirtualNetworkDevices(context.Background(), machineScope, networkSpec, networkConfigData)
require.Error(t, err)
require.Nil(t, cfg)
}
func TestReconcileBootstrapData_DualStack(t *testing.T) {
machineScope, _, kubeClient := setupReconcilerTest(t)
machineScope.InfraCluster.ProxmoxCluster.Spec.IPv6Config = &v1alpha2.InClusterIPPoolSpec{
@@ -213,6 +255,50 @@ func TestReconcileBootstrapData_DualStack_AdditionalDevices(t *testing.T) {
require.True(t, *machineScope.ProxmoxMachine.Status.BootstrapDataProvided)
}
func TestReconcileBootstrapData_VirtualDevices_VRF(t *testing.T) {
machineScope, _, kubeClient := setupReconcilerTest(t)
machineScope.ProxmoxMachine.Spec.Network = &infrav1alpha1.NetworkSpec{
VirtualNetworkDevices: infrav1alpha1.VirtualNetworkDevices{
VRFs: []infrav1alpha1.VRFDevice{{
Interfaces: []string{"net1"},
Name: "vrf-blue",
Table: 500,
}},
},
AdditionalDevices: []infrav1alpha1.AdditionalNetworkDevice{
{
NetworkDevice: infrav1alpha1.NetworkDevice{Bridge: "vmbr1", Model: ptr.To("virtio")},
Name: "net1",
DNSServers: []string{"1.2.3.4"},
IPv4PoolRef: &corev1.TypedLocalObjectReference{
APIGroup: ptr.To("ipam.cluster.x-k8s.io"),
Kind: "InClusterIPPool",
Name: "sample",
},
},
},
}
vm := newVMWithNets("virtio=A6:23:64:4D:84:CB,bridge=vmbr0", "virtio=AA:23:64:4D:84:CD,bridge=vmbr1")
vm.VirtualMachineConfig.SMBios1 = biosUUID
machineScope.SetVirtualMachine(vm)
machineScope.ProxmoxMachine.Status.IPAddresses = map[string]infrav1alpha1.IPAddress{infrav1alpha1.DefaultNetworkDevice: {IPV4: "10.10.10.10"}, "net1": {IPV4: "10.100.10.10"}}
createIP4AddressResource(t, kubeClient, machineScope, infrav1alpha1.DefaultNetworkDevice, "10.10.10.10")
createIP4AddressResource(t, kubeClient, machineScope, "net1", "10.100.10.10")
createBootstrapSecret(t, kubeClient, machineScope)
getISOInjector = func(_ *proxmox.VirtualMachine, _ []byte, _, _ cloudinit.Renderer) isoInjector {
return FakeISOInjector{}
}
t.Cleanup(func() { getISOInjector = defaultISOInjector })
requeue, err := reconcileBootstrapData(context.Background(), machineScope)
require.NoError(t, err)
require.False(t, requeue)
require.False(t, conditions.Has(machineScope.ProxmoxMachine, infrav1alpha1.VMProvisionedCondition))
require.True(t, *machineScope.ProxmoxMachine.Status.BootstrapDataProvided)
}
func TestVMHasMacAddress(t *testing.T) {
machineScope := &scope.MachineScope{VirtualMachine: newRunningVM()}
require.False(t, vmHasMacAddresses(machineScope))

View File

@@ -42,4 +42,10 @@ var (
// ErrMissingIPAddresses returns an error if required ip addresses is empty.
ErrMissingIPAddresses = errors.New("ip addresses is not set")
// ErrMalformedRoute is returned if a route can not be assembled by netplan.
ErrMalformedRoute = errors.New("route is malformed")
// ErrMalformedFIBRule is returned if a fib rule can not be assembled by netplan.
ErrMalformedFIBRule = errors.New("routing policy is malformed")
)

View File

@@ -27,7 +27,9 @@ const (
renderer: networkd
ethernets:
{{- range $index, $element := .NetworkConfigData }}
eth{{ $index }}:
{{- $type := $element.Type }}
{{- if eq $type "ethernet" }}
{{ $element.Name }}:
match:
macaddress: {{ $element.MacAddress }}
dhcp4: {{ if $element.DHCP4 }}true{{ else }}false{{ end }}
@@ -57,7 +59,48 @@ const (
- '{{ . }}'
{{- end -}}
{{- end -}}
{{- end -}}`
{{- end -}}
{{- end -}}
{{- $vrf := 0 -}}
{{- range $index, $element := .NetworkConfigData }}
{{- if eq $element.Type "vrf" }}
{{- if eq $vrf 0 }}
vrfs:
{{- $vrf := 1 }}
{{- end }}
{{$element.Name}}:
table: {{ $element.Table }}
{{- if $element.Routes }}{{ template "routes" $element }}{{- end -}}
{{- if $element.FIBRules }}{{ template "rules" $element }}{{- end -}}
{{- if $element.Interfaces }}
interfaces:
{{- range $element.Interfaces }}
- {{ . }}
{{- end -}}
{{- end -}}
{{- end -}}
{{- end -}}
{{- define "rules" }}
routing-policy:
{{- range $index, $rule := .FIBRules }}
- {
{{- if $rule.To }} "to": "{{$rule.To}}", {{ end -}}
{{- if $rule.From }} "from": "{{$rule.From}}", {{ end -}}
{{- if $rule.Priority }} "priority": {{$rule.Priority}}, {{ end -}}
{{- if $rule.Table }} "table": {{$rule.Table}}, {{ end -}} }
{{- end }}
{{- end -}}
{{- define "routes" }}
routes:
{{- range $index, $route := .Routes }}
- {
{{- if $route.To }} "to": "{{$route.To}}", {{ end -}}
{{- if $route.Via }} "via": "{{$route.Via}}", {{ end -}}
{{- if $route.Metric }} "metric": {{$route.Metric}}, {{ end -}}
{{- if $route.Table }} "table": {{$route.Table}}, {{ end -}} }
{{- end }}
{{- end -}}
`
)
// NetworkConfig provides functionality to render machine network-config.
@@ -89,9 +132,23 @@ func (r *NetworkConfig) validate() error {
return ErrMissingNetworkConfigData
}
for _, d := range r.data.NetworkConfigData {
// TODO: refactor this when network configuration is unified
if d.Type != "ethernet" {
err := validRoutes(d.Routes)
if err != nil {
return err
}
err = validFIBRules(d.FIBRules, true)
if err != nil {
return err
}
continue
}
if !d.DHCP4 && !d.DHCP6 && len(d.IPAddress) == 0 && len(d.IPV6Address) == 0 {
return ErrMissingIPAddress
}
if d.MacAddress == "" {
return ErrMissingMacAddress
}
@@ -119,6 +176,58 @@ func (r *NetworkConfig) validate() error {
return nil
}
func validRoutes(input []RoutingData) error {
if len(input) == 0 {
return nil
}
// No support for blackhole, etc.pp. Add iff you require this.
for _, route := range input {
if route.To != "default" {
// An IP address is a valid route (implicit smallest subnet)
_, errPrefix := netip.ParsePrefix(route.To)
_, errAddr := netip.ParseAddr(route.To)
if errPrefix != nil && errAddr != nil {
return ErrMalformedRoute
}
}
if route.Via != "" {
_, err := netip.ParseAddr(route.Via)
if err != nil {
return ErrMalformedRoute
}
}
}
return nil
}
func validFIBRules(input []FIBRuleData, isVrf bool) error {
if len(input) == 0 {
return nil
}
for _, rule := range input {
// We only support To/From and we require a table if we're not a vrf
if (rule.To == "" && rule.From == "") || (rule.Table == 0 && !isVrf) {
return ErrMalformedFIBRule
}
if rule.To != "" {
_, errPrefix := netip.ParsePrefix(rule.To)
_, errAddr := netip.ParseAddr(rule.To)
if errPrefix != nil && errAddr != nil {
return ErrMalformedFIBRule
}
}
if rule.From != "" {
_, errPrefix := netip.ParsePrefix(rule.From)
_, errAddr := netip.ParseAddr(rule.From)
if errPrefix != nil && errAddr != nil {
return ErrMalformedFIBRule
}
}
}
return nil
}
func validIPAddress(input string) error {
if input == "" {
return ErrMissingIPAddress

View File

@@ -192,6 +192,60 @@ const (
addresses:
- '8.8.8.8'
- '8.8.4.4'`
expectedValidNetworkConfigMultipleNicsVRF = `network:
version: 2
renderer: networkd
ethernets:
eth0:
match:
macaddress: 92:60:a0:5b:22:c2
dhcp4: false
dhcp6: false
addresses:
- 10.10.10.12/24
routes:
- to: 0.0.0.0/0
via: 10.10.10.1
nameservers:
addresses:
- '8.8.8.8'
- '8.8.4.4'
eth1:
match:
macaddress: b4:87:18:bf:a3:60
dhcp4: false
dhcp6: false
addresses:
- 196.168.100.124/24
routes:
- to: 0.0.0.0/0
via: 196.168.100.254
nameservers:
addresses:
- '8.8.8.8'
- '8.8.4.4'
vrfs:
vrf-blue:
table: 500
routes:
- { "to": "default", "via": "192.168.178.1", "metric": 100, "table": 100, }
- { "to": "10.10.10.0/24", "via": "192.168.178.254", "metric": 100, }
routing-policy:
- { "to": "0.0.0.0/0", "from": "192.168.178.1/24", "priority": 999, "table": 100, }
interfaces:
- eth0
- eth1`
expectedValidNetworkConfigValidFIBRule = `network:
version: 2
renderer: networkd
ethernets:
vrfs:
vrf-blue:
table: 500
routing-policy:
- { "from": "10.10.0.0/16", }`
)
func TestNetworkConfig_Render(t *testing.T) {
@@ -214,6 +268,8 @@ func TestNetworkConfig_Render(t *testing.T) {
args: args{
nics: []NetworkConfigData{
{
Type: "ethernet",
Name: "eth0",
MacAddress: "92:60:a0:5b:22:c2",
IPAddress: "10.10.10.12/24",
Gateway: "10.10.10.1",
@@ -231,6 +287,8 @@ func TestNetworkConfig_Render(t *testing.T) {
args: args{
nics: []NetworkConfigData{
{
Type: "ethernet",
Name: "eth0",
MacAddress: "92:60:a0:5b:22:c2",
DHCP6: true,
IPAddress: "10.10.10.12/24",
@@ -249,6 +307,8 @@ func TestNetworkConfig_Render(t *testing.T) {
args: args{
nics: []NetworkConfigData{
{
Type: "ethernet",
Name: "eth0",
MacAddress: "92:60:a0:5b:22:c2",
Gateway: "10.10.10.1",
DNSServers: []string{"8.8.8.8", "8.8.4.4"},
@@ -265,6 +325,8 @@ func TestNetworkConfig_Render(t *testing.T) {
args: args{
nics: []NetworkConfigData{
{
Type: "ethernet",
Name: "eth0",
MacAddress: "92:60:a0:5b:22:c2",
IPAddress: "10.10.10.12",
Gateway: "10.10.10.1",
@@ -277,11 +339,32 @@ func TestNetworkConfig_Render(t *testing.T) {
err: ErrMalformedIPAddress,
},
},
"InvalidNetworkConfigMalformedIP": {
reason: "ip address malformed",
args: args{
nics: []NetworkConfigData{
{
Type: "ethernet",
Name: "eth0",
MacAddress: "92:60:a0:5b:22:c2",
IPAddress: "10.10.10.115",
Gateway: "10.10.10.1",
DNSServers: []string{"8.8.8.8", "8.8.4.4"},
},
},
},
want: want{
network: "",
err: ErrMalformedIPAddress,
},
},
"InvalidNetworkConfigGW": {
reason: "gw is not set",
args: args{
nics: []NetworkConfigData{
{
Type: "ethernet",
Name: "eth0",
MacAddress: "92:60:a0:5b:22:c2",
IPAddress: "10.10.10.12/24",
DNSServers: []string{"8.8.8.8", "8.8.4.4"},
@@ -298,6 +381,8 @@ func TestNetworkConfig_Render(t *testing.T) {
args: args{
nics: []NetworkConfigData{
{
Type: "ethernet",
Name: "eth0",
IPAddress: "10.10.10.11/24",
Gateway: "10.10.10.1",
DNSServers: []string{"8.8.8.8", "8.8.4.4"},
@@ -314,6 +399,8 @@ func TestNetworkConfig_Render(t *testing.T) {
args: args{
nics: []NetworkConfigData{
{
Type: "ethernet",
Name: "eth0",
MacAddress: "92:60:a0:5b:22:c2",
IPAddress: "10.10.10.12/24",
Gateway: "10.10.10.1",
@@ -330,12 +417,16 @@ func TestNetworkConfig_Render(t *testing.T) {
args: args{
nics: []NetworkConfigData{
{
Type: "ethernet",
Name: "eth0",
MacAddress: "92:60:a0:5b:22:c2",
IPAddress: "10.10.10.12/24",
Gateway: "10.10.10.1",
DNSServers: []string{"8.8.8.8", "8.8.4.4"},
},
{
Type: "ethernet",
Name: "eth1",
MacAddress: "b4:87:18:bf:a3:60",
IPAddress: "196.168.100.124/24",
Gateway: "196.168.100.254",
@@ -363,6 +454,8 @@ func TestNetworkConfig_Render(t *testing.T) {
args: args{
nics: []NetworkConfigData{
{
Type: "ethernet",
Name: "eth0",
MacAddress: "92:60:a0:5b:22:c2",
IPAddress: "10.10.10.12/24",
IPV6Address: "2001:db8::1/64",
@@ -382,6 +475,8 @@ func TestNetworkConfig_Render(t *testing.T) {
args: args{
nics: []NetworkConfigData{
{
Type: "ethernet",
Name: "eth0",
MacAddress: "92:60:a0:5b:22:c2",
IPV6Address: "2001:db8::1/64",
Gateway6: "2001:db8::1",
@@ -399,6 +494,8 @@ func TestNetworkConfig_Render(t *testing.T) {
args: args{
nics: []NetworkConfigData{
{
Type: "ethernet",
Name: "eth0",
MacAddress: "92:60:a0:5b:22:c2",
DHCP4: true,
DHCP6: true,
@@ -416,6 +513,8 @@ func TestNetworkConfig_Render(t *testing.T) {
args: args{
nics: []NetworkConfigData{
{
Type: "ethernet",
Name: "eth0",
MacAddress: "92:60:a0:5b:22:c2",
DHCP4: true,
DHCP6: false,
@@ -433,6 +532,8 @@ func TestNetworkConfig_Render(t *testing.T) {
args: args{
nics: []NetworkConfigData{
{
Type: "ethernet",
Name: "eth0",
MacAddress: "92:60:a0:5b:22:c2",
DHCP4: false,
DHCP6: true,
@@ -445,6 +546,94 @@ func TestNetworkConfig_Render(t *testing.T) {
err: nil,
},
},
"ValidNetworkConfigMultipleNicsVRF": {
reason: "valid config multiple nics enslaved to VRF",
args: args{
nics: []NetworkConfigData{
{
Type: "ethernet",
Name: "eth0",
MacAddress: "92:60:a0:5b:22:c2",
IPAddress: "10.10.10.12/24",
Gateway: "10.10.10.1",
DNSServers: []string{"8.8.8.8", "8.8.4.4"},
},
{
Type: "ethernet",
Name: "eth1",
MacAddress: "b4:87:18:bf:a3:60",
IPAddress: "196.168.100.124/24",
Gateway: "196.168.100.254",
DNSServers: []string{"8.8.8.8", "8.8.4.4"},
},
{
Type: "vrf",
Name: "vrf-blue",
Table: 500,
Interfaces: []string{"eth0", "eth1"},
Routes: []RoutingData{{
To: "default",
Via: "192.168.178.1",
Metric: 100,
Table: 100,
}, {
To: "10.10.10.0/24",
Via: "192.168.178.254",
Metric: 100,
}},
FIBRules: []FIBRuleData{{
To: "0.0.0.0/0",
From: "192.168.178.1/24",
Priority: 999,
Table: 100,
}},
},
},
},
want: want{
network: expectedValidNetworkConfigMultipleNicsVRF,
err: nil,
},
},
"ValidNetworkConfigValidFIBRule": {
reason: "valid config valid routing policy",
args: args{
nics: []NetworkConfigData{
{
Type: "vrf",
Name: "vrf-blue",
Table: 500,
FIBRules: []FIBRuleData{{
From: "10.10.0.0/16",
}},
},
},
},
want: want{
network: expectedValidNetworkConfigValidFIBRule,
err: nil,
},
},
"InvalidNetworkConfigMalformedFIBRule": {
reason: "invalid config malformed routing policy",
args: args{
nics: []NetworkConfigData{
{
Type: "vrf",
Name: "vrf-blue",
Table: 500,
Interfaces: []string{"eth0", "eth1"},
Routes: []RoutingData{{
Table: 100,
}},
},
},
},
want: want{
network: "",
err: ErrMalformedRoute,
},
},
}
for n, tc := range cases {

View File

@@ -26,6 +26,7 @@ type BaseCloudInitData struct {
// NetworkConfigData is used to render network-config.
type NetworkConfigData struct {
ProxName string // Device name in Proxmox
MacAddress string
DHCP4 bool
DHCP6 bool
@@ -34,4 +35,26 @@ type NetworkConfigData struct {
Gateway string
Gateway6 string
DNSServers []string
Type string
Name string
Interfaces []string // Interfaces controlled by this one.
Table uint32 // linux routing table number for VRF.
Routes []RoutingData
FIBRules []FIBRuleData // Forwarding information block for routing.
}
// RoutingData stores routing configuration.
type RoutingData struct {
To string
Via string
Metric uint32
Table uint32
}
// FIBRuleData stores forward information base rules (routing policies).
type FIBRuleData struct {
To string
From string
Priority uint32
Table uint32
}

View File

@@ -0,0 +1,599 @@
apiVersion: ipam.cluster.x-k8s.io/v1alpha2
kind: GlobalInClusterIPPool
metadata:
name: shared-int-service-v4-inclusterippool
spec:
addresses: ${SECONDARY_IP_RANGES}
prefix: ${SECONDARY_IP_PREFIX}
# invalid gateway because IPAM requires a gateway
gateway: 169.254.0.254
---
apiVersion: ipam.cluster.x-k8s.io/v1alpha2
kind: GlobalInClusterIPPool
metadata:
name: shared-ext-service-v4-inclusterippool
spec:
addresses: ${LB_BGP_IPV4_RANGES}
prefix: ${LB_BGP_IPV4_PREFIX}
# invalid gateway because IPAM requires a gateway
gateway: 169.254.0.254
---
apiVersion: ipam.cluster.x-k8s.io/v1alpha2
kind: GlobalInClusterIPPool
metadata:
name: shared-ext-service-v6-inclusterippool
spec:
addresses: ${LB_BGP_IPV6_RANGE}
prefix: ${LB_BGP_IPV6_PREFIX}
# invalid gateway because IPAM requires a gateway
gateway: ::ffff:a9:fe:0:fe
---
apiVersion: cluster.x-k8s.io/v1beta1
kind: Cluster
metadata:
labels:
cluster.x-k8s.io/cluster-name: '${CLUSTER_NAME}'
name: "${CLUSTER_NAME}"
spec:
clusterNetwork:
pods:
cidrBlocks: ["192.168.0.0/16"]
infrastructureRef:
apiVersion: infrastructure.cluster.x-k8s.io/v1alpha1
kind: ProxmoxCluster
name: "${CLUSTER_NAME}"
controlPlaneRef:
kind: KubeadmControlPlane
apiVersion: controlplane.cluster.x-k8s.io/v1beta1
name: "${CLUSTER_NAME}-control-plane"
---
apiVersion: infrastructure.cluster.x-k8s.io/v1alpha1
kind: ProxmoxCluster
metadata:
name: "${CLUSTER_NAME}"
labels:
cluster.x-k8s.io/cluster-name: "${CLUSTER_NAME}"
spec:
controlPlaneEndpoint:
host: ${CONTROL_PLANE_ENDPOINT_IP}
port: 6443
ipv4Config:
addresses: ${NODE_IP_RANGES}
prefix: ${IP_PREFIX}
gateway: ${GATEWAY}
ipv6Config:
addresses: ${NODE_IPV6_RANGES}
prefix: ${IPV6_PREFIX:=64}
gateway: ${IPV6_GATEWAY}
dnsServers: ${DNS_SERVERS}
allowedNodes: ${ALLOWED_NODES:=[]}
---
kind: KubeadmControlPlane
apiVersion: controlplane.cluster.x-k8s.io/v1beta1
metadata:
name: "${CLUSTER_NAME}-control-plane"
spec:
replicas: ${CONTROL_PLANE_MACHINE_COUNT}
machineTemplate:
infrastructureRef:
kind: ProxmoxMachineTemplate
apiVersion: infrastructure.cluster.x-k8s.io/v1alpha1
name: "${CLUSTER_NAME}-control-plane"
kubeadmConfigSpec:
users:
- name: root
sshAuthorizedKeys: [${VM_SSH_KEYS}]
files:
- content: |
apiVersion: v1
kind: Pod
metadata:
creationTimestamp: null
name: kube-vip
namespace: kube-system
spec:
containers:
- args:
- manager
env:
- name: cp_enable
value: "true"
- name: vip_interface
value: ${VIP_NETWORK_INTERFACE=""}
- name: address
value: ${CONTROL_PLANE_ENDPOINT_IP}
- name: port
value: "6443"
- name: vip_arp
value: "true"
- name: vip_leaderelection
value: "true"
- name: vip_leaseduration
value: "15"
- name: vip_renewdeadline
value: "10"
- name: vip_retryperiod
value: "2"
image: ghcr.io/kube-vip/kube-vip:v0.5.11
imagePullPolicy: IfNotPresent
name: kube-vip
resources: {}
securityContext:
capabilities:
add:
- NET_ADMIN
- NET_RAW
volumeMounts:
- mountPath: /etc/kubernetes/admin.conf
name: kubeconfig
hostAliases:
- hostnames:
- kubernetes
ip: 127.0.0.1
hostNetwork: true
volumes:
- hostPath:
path: /etc/kubernetes/admin.conf
type: FileOrCreate
name: kubeconfig
status: {}
owner: root:root
path: /etc/kubernetes/manifests/kube-vip.yaml
- content: |
#/bin/sh
cat >> /run/kubeadm/kubeadm.yaml <<EOF
---
apiVersion: kubeproxy.config.k8s.io/v1alpha1
kind: KubeProxyConfiguration
mode: "ipvs"
ipvs:
strictARP: true
owner: root:root
permissions: "0755"
path: /tmp/kube-proxy.sh
initConfiguration:
nodeRegistration:
kubeletExtraArgs:
provider-id: "proxmox://'{{ ds.meta_data.instance_id }}'"
joinConfiguration:
nodeRegistration:
kubeletExtraArgs:
provider-id: "proxmox://'{{ ds.meta_data.instance_id }}'"
preKubeadmCommands:
- /tmp/kube-proxy.sh
version: "${KUBERNETES_VERSION}"
---
kind: ProxmoxMachineTemplate
apiVersion: infrastructure.cluster.x-k8s.io/v1alpha1
metadata:
name: "${CLUSTER_NAME}-control-plane"
spec:
template:
spec:
sourceNode: "${PROXMOX_SOURCENODE}"
templateID: ${TEMPLATE_VMID}
format: "qcow2"
full: true
numSockets: ${NUM_SOCKETS:=2}
numCores: ${NUM_CORES:=4}
memoryMiB: ${MEMORY_MIB:=16384}
disks:
bootVolume:
disk: ${BOOT_VOLUME_DEVICE:=scsi0}
sizeGb: ${BOOT_VOLUME_SIZE:=100}
network:
default:
bridge: ${BRIDGE}
model: virtio
additionalDevices:
- name: net1
bridge: ${SECONDARY_BRIDGE}
model: virtio
ipv4PoolRef:
apiGroup: ipam.cluster.x-k8s.io
kind: GlobalInClusterIPPool
name: shared-int-service-v4-inclusterippool
dnsServers: ${DNS_SERVERS}
---
apiVersion: cluster.x-k8s.io/v1beta1
kind: MachineDeployment
metadata:
labels:
cluster.x-k8s.io/cluster-name: '${CLUSTER_NAME}'
name: "${CLUSTER_NAME}-workers"
spec:
clusterName: "${CLUSTER_NAME}"
replicas: ${WORKER_MACHINE_COUNT}
selector:
matchLabels:
template:
metadata:
labels:
cluster.x-k8s.io/cluster-name: '${CLUSTER_NAME}'
node-role.kubernetes.io/node: ""
spec:
clusterName: "${CLUSTER_NAME}"
version: "${KUBERNETES_VERSION}"
bootstrap:
configRef:
name: "${CLUSTER_NAME}-worker"
apiVersion: bootstrap.cluster.x-k8s.io/v1beta1
kind: KubeadmConfigTemplate
infrastructureRef:
name: "${CLUSTER_NAME}-worker"
apiVersion: infrastructure.cluster.x-k8s.io/v1alpha1
kind: ProxmoxMachineTemplate
---
apiVersion: infrastructure.cluster.x-k8s.io/v1alpha1
kind: ProxmoxMachineTemplate
metadata:
name: "${CLUSTER_NAME}-worker"
spec:
template:
spec:
sourceNode: "${PROXMOX_SOURCENODE}"
templateID: ${TEMPLATE_VMID}
format: "qcow2"
full: true
numSockets: ${NUM_SOCKETS:=2}
numCores: ${NUM_CORES:=4}
memoryMiB: ${MEMORY_MIB:=16384}
disks:
bootVolume:
disk: ${BOOT_VOLUME_DEVICE:=scsi0}
sizeGb: ${BOOT_VOLUME_SIZE:=100}
network:
default:
bridge: ${BRIDGE}
model: virtio
additionalDevices:
- name: net1
bridge: ${SECONDARY_BRIDGE}
model: virtio
ipv4PoolRef:
apiGroup: ipam.cluster.x-k8s.io
kind: GlobalInClusterIPPool
name: shared-int-service-v4-inclusterippool
dnsServers: ${DNS_SERVERS}
---
apiVersion: cluster.x-k8s.io/v1beta1
kind: MachineDeployment
metadata:
name: "${CLUSTER_NAME}-load-balancers"
namespace: default
spec:
clusterName: "${CLUSTER_NAME}"
replicas: ${LOAD_BALANCER_MACHINE_COUNT}
selector:
matchLabels:
template:
metadata:
labels:
node-role.kubernetes.io/node: ""
node-role.kubernetes.io/load-balancer: ""
spec:
clusterName: "${CLUSTER_NAME}"
version: "${KUBERNETES_VERSION}"
bootstrap:
configRef:
name: "${CLUSTER_NAME}-load-balancer"
apiVersion: bootstrap.cluster.x-k8s.io/v1beta1
kind: KubeadmConfigTemplate
infrastructureRef:
name: "${CLUSTER_NAME}-load-balancer"
apiVersion: infrastructure.cluster.x-k8s.io/v1alpha1
kind: ProxmoxMachineTemplate
---
apiVersion: infrastructure.cluster.x-k8s.io/v1alpha1
kind: ProxmoxMachineTemplate
metadata:
name: "${CLUSTER_NAME}-load-balancer"
spec:
template:
spec:
sourceNode: "${PROXMOX_SOURCENODE}"
templateID: ${TEMPLATE_VMID}
format: "qcow2"
full: true
numSockets: ${NUM_SOCKETS_LB:=1}
numCores: ${NUM_CORES_LB:=4}
memoryMiB: ${MEMORY_MIB_LB:=2048}
disks:
bootVolume:
disk: ${BOOT_VOLUME_DEVICE:=scsi0}
sizeGb: ${BOOT_VOLUME_SIZE:=100}
network:
default:
bridge: ${BRIDGE}
model: virtio
additionalDevices:
- name: net1
bridge: ${SECONDARY_BRIDGE}
model: virtio
ipv4PoolRef:
apiGroup: ipam.cluster.x-k8s.io
kind: GlobalInClusterIPPool
name: shared-int-service-v4-inclusterippool
dnsServers: ${DNS_SERVERS}
- name: net2
bridge: ${EXT_SERVICE_BRIDGE}
model: virtio
ipv4PoolRef:
apiGroup: ipam.cluster.x-k8s.io
kind: GlobalInClusterIPPool
name: shared-ext-service-v4-inclusterippool
ipv6PoolRef:
apiGroup: ipam.cluster.x-k8s.io
kind: GlobalInClusterIPPool
name: shared-ext-service-v6-inclusterippool
dnsServers: ${DNS_SERVERS}
vrfs:
- name: vrf-ext
table: 500
interfaces:
- net2
routingPolicy:
- from: "${METALLB_IPV4_RANGE}"
- from: "${METALLB_IPV6_RANGE}"
---
apiVersion: bootstrap.cluster.x-k8s.io/v1beta1
kind: KubeadmConfigTemplate
metadata:
name: "${CLUSTER_NAME}-worker"
spec:
template:
spec:
users:
- name: root
sshAuthorizedKeys: [${VM_SSH_KEYS}]
joinConfiguration:
nodeRegistration:
kubeletExtraArgs:
provider-id: "proxmox://'{{ ds.meta_data.instance_id }}'"
---
apiVersion: bootstrap.cluster.x-k8s.io/v1beta1
kind: KubeadmConfigTemplate
metadata:
name: "${CLUSTER_NAME}-load-balancer"
namespace: default
spec:
template:
spec:
users:
- name: root
sshAuthorizedKeys: [${VM_SSH_KEYS}]
joinConfiguration:
nodeRegistration:
kubeletExtraArgs:
provider-id: "proxmox://'{{ ds.meta_data.instance_id }}'"
taints:
- effect: NoSchedule
key: node-role.kubernetes.io/load-balancer
value: ""
---
apiVersion: addons.cluster.x-k8s.io/v1beta1
kind: ClusterResourceSet
metadata:
labels:
cluster.x-k8s.io/cluster-name: '${CLUSTER_NAME}'
name: ${CLUSTER_NAME}-crs-0
spec:
clusterSelector:
matchLabels:
cluster.x-k8s.io/cluster-name: '${CLUSTER_NAME}'
resources:
- kind: ConfigMap
name: cilium
- kind: ConfigMap
name: metallb-namespace
- kind: ConfigMap
name: metallb
- kind: ConfigMap
name: "metallb-${CLUSTER_NAME}"
strategy: Reconcile
---
apiVersion: v1
data:
metallb-namespace: |
apiVersion: v1
kind: Namespace
metadata:
name: metallb-system
labels:
kubernetes.io/metadata.name: default
spec:
finalizers:
- kubernetes
kind: ConfigMap
metadata:
name: metallb-namespace
namespace: default
---
apiVersion: v1
kind: ConfigMap
metadata:
name: "metallb-${CLUSTER_NAME}"
namespace: default
data:
metallb-config: |
apiVersion: metallb.io/v1beta1
kind: BGPAdvertisement
metadata:
namespace: metallb-system
name: ext-service
spec:
aggregationLength: 32
aggregationLengthV6: 128
ipAddressPools:
- ext-service-v4-pool
- ext-service-v6-pool
---
apiVersion: metallb.io/v1beta1
kind: IPAddressPool
metadata:
namespace: metallb-system
name: ext-service-v4-pool
spec:
addresses: ["${METALLB_IPV4_RANGE}"]
autoAssign: true
avoidBuggyIPs: false
---
apiVersion: metallb.io/v1beta1
kind: IPAddressPool
metadata:
namespace: metallb-system
name: ext-service-v6-pool
spec:
addresses: ["${METALLB_IPV6_RANGE}"]
autoAssign: true
avoidBuggyIPs: false
---
apiVersion: metallb.io/v1beta2
kind: BGPPeer
metadata:
name: ext-switch1
namespace: metallb-system
spec:
myASN: ${METALLB_IPV4_ASN}
peerASN: ${METALLB_IPV4_BGP_PEER_ASN}
peerAddress: ${METALLB_IPV4_BGP_PEER}
peerPort: 179
password: ${METALLB_IPV4_BGP_SECRET}
vrf: vrf-ext
---
apiVersion: metallb.io/v1beta2
kind: BGPPeer
metadata:
name: ext-switch1-v6
namespace: metallb-system
spec:
myASN: ${METALLB_IPV6_ASN}
peerASN: ${METALLB_IPV6_BGP_PEER_ASN}
peerAddress: ${METALLB_IPV6_BGP_PEER}
peerPort: 179
password: ${METALLB_IPV6_BGP_SECRET}
vrf: vrf-ext
---
apiVersion: metallb.io/v1beta2
kind: BGPPeer
metadata:
name: ext-switch2
namespace: metallb-system
spec:
myASN: ${METALLB_IPV4_ASN}
peerASN: ${METALLB_IPV4_BGP_PEER2_ASN}
peerAddress: ${METALLB_IPV4_BGP_PEER2}
peerPort: 179
password: ${METALLB_IPV4_BGP_SECRET}
vrf: vrf-ext
---
apiVersion: metallb.io/v1beta2
kind: BGPPeer
metadata:
name: ext-switch2-v6
namespace: metallb-system
spec:
myASN: ${METALLB_IPV6_ASN}
peerASN: ${METALLB_IPV6_BGP_PEER2_ASN}
peerAddress: ${METALLB_IPV6_BGP_PEER2}
peerPort: 179
password: ${METALLB_IPV6_BGP_SECRET}
vrf: vrf-ext
---
apiVersion: frrk8s.metallb.io/v1beta1
kind: FRRConfiguration
metadata:
namespace: metallb-system
name: frr-route-import
spec:
nodeSelector:
matchLabels:
node-role.kubernetes.io/load-balancer: ""
bgp:
routers:
- asn: ${METALLB_IPV4_ASN}
vrf: vrf-ext
neighbors:
- address: ${METALLB_IPV4_BGP_PEER}
asn: ${METALLB_IPV4_BGP_PEER_ASN}
holdTime: 1m30s
keepaliveTime: 30s
password: ${METALLB_IPV4_BGP_SECRET}
toReceive:
allowed:
prefixes:
- prefix: 0.0.0.0/0
- asn: ${METALLB_IPV6_ASN}
vrf: vrf-ext
neighbors:
- address: ${METALLB_IPV6_BGP_PEER}
asn: ${METALLB_IPV6_BGP_PEER_ASN}
holdTime: 1m30s
keepaliveTime: 30s
password: ${METALLB_IPV6_BGP_SECRET}
toReceive:
allowed:
prefixes:
- prefix: ::0/0
- asn: ${METALLB_IPV4_ASN}
vrf: vrf-ext
neighbors:
- address: ${METALLB_IPV4_BGP_PEER2}
asn: ${METALLB_IPV4_BGP_PEER2_ASN}
holdTime: 1m30s
keepaliveTime: 30s
password: ${METALLB_IPV4_BGP_SECRET}
toReceive:
allowed:
prefixes:
- prefix: 0.0.0.0/0
- asn: ${METALLB_IPV6_ASN}
vrf: vrf-ext
neighbors:
- address: ${METALLB_IPV6_BGP_PEER2}
asn: ${METALLB_IPV6_BGP_PEER2_ASN}
holdTime: 1m30s
keepaliveTime: 30s
password: ${METALLB_IPV6_BGP_SECRET}
toReceive:
allowed:
prefixes:
- prefix: ::0/0
---
apiVersion: cluster.x-k8s.io/v1beta1
kind: MachineHealthCheck
metadata:
name: "${CLUSTER_NAME}-node-unhealthy-5m"
spec:
clusterName: "${CLUSTER_NAME}"
maxUnhealthy: 40%
nodeStartupTimeout: 15m
selector:
matchLabels:
node-role.kubernetes.io/node: ""
unhealthyConditions:
- type: Ready
status: Unknown
timeout: 300s
- type: Ready
status: "False"
timeout: 300s
---
apiVersion: cluster.x-k8s.io/v1beta1
kind: MachineHealthCheck
metadata:
name: "${CLUSTER_NAME}-control-plane-unhealthy-5m"
spec:
clusterName: "${CLUSTER_NAME}"
maxUnhealthy: 100%
nodeStartupTimeout: 15m
selector:
matchLabels:
cluster.x-k8s.io/control-plane: ""
unhealthyConditions:
- type: Ready
status: Unknown
timeout: 300s
- type: Ready
status: "False"
timeout: 300s