Add azure ccm/csi/autoscaler

This commit is contained in:
Serge Logvinov
2022-05-25 13:39:36 +03:00
parent 69d128a1ea
commit ec8806c4c5
20 changed files with 1299 additions and 24 deletions

1
azure/.gitignore vendored
View File

@@ -1,2 +1,3 @@
_cfgs/
*.yaml
azure.json

View File

@@ -0,0 +1,202 @@
apiVersion: v1
kind: ServiceAccount
metadata:
name: azure-cloud-controller-manager
namespace: kube-system
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: system:cloud-controller-manager
annotations:
rbac.authorization.kubernetes.io/autoupdate: "true"
labels:
k8s-app: azure-cloud-controller-manager
rules:
- apiGroups:
- ""
resources:
- events
verbs:
- create
- patch
- update
- apiGroups:
- ""
resources:
- nodes
verbs:
- "*"
- apiGroups:
- ""
resources:
- nodes/status
verbs:
- patch
- apiGroups:
- ""
resources:
- services
verbs:
- list
- patch
- update
- watch
- apiGroups:
- ""
resources:
- services/status
verbs:
- list
- patch
- update
- watch
- apiGroups:
- ""
resources:
- serviceaccounts
verbs:
- create
- get
- list
- watch
- update
- apiGroups:
- ""
resources:
- persistentvolumes
verbs:
- get
- list
- update
- watch
- apiGroups:
- ""
resources:
- endpoints
verbs:
- create
- get
- list
- watch
- update
- apiGroups:
- ""
resources:
- secrets
verbs:
- get
- list
- watch
- apiGroups:
- coordination.k8s.io
resources:
- leases
verbs:
- get
- create
- update
---
kind: ClusterRoleBinding
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: system:cloud-controller-manager
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: system:cloud-controller-manager
subjects:
- kind: ServiceAccount
name: azure-cloud-controller-manager
namespace: kube-system
- kind: User
name: azure-cloud-controller-manager
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
name: system:cloud-controller-manager:extension-apiserver-authentication-reader
namespace: kube-system
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: Role
name: extension-apiserver-authentication-reader
subjects:
- kind: ServiceAccount
name: azure-cloud-controller-manager
namespace: kube-system
- apiGroup: ""
kind: User
name: azure-cloud-controller-manager
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: azure-cloud-controller-manager
namespace: kube-system
labels:
component: azure-cloud-controller-manager
spec:
replicas: 1
selector:
matchLabels:
tier: control-plane
component: azure-cloud-controller-manager
template:
metadata:
labels:
tier: control-plane
component: azure-cloud-controller-manager
spec:
priorityClassName: system-node-critical
hostNetwork: true
serviceAccountName: azure-cloud-controller-manager
nodeSelector:
node-role.kubernetes.io/master: ""
tolerations:
- effect: NoSchedule
key: node-role.kubernetes.io/master
operator: Exists
- effect: NoSchedule
key: node-role.kubernetes.io/control-plane
operator: Exists
containers:
- name: azure-cloud-controller-manager
image: mcr.microsoft.com/oss/kubernetes/azure-cloud-controller-manager:v1.24.0
imagePullPolicy: IfNotPresent
command: ["cloud-controller-manager"]
args:
- --v=2
- --cluster-name=$(CLUSTER_NAME)
- --cloud-config=/etc/azure/azure.json
- --cloud-provider=azure
- --allocate-node-cidrs=false
- --controllers=cloud-node-lifecycle # disable cloud-node controller
- --use-service-account-credentials
- --bind-address=127.0.0.1
- --port=10267
env:
- name: CLUSTER_NAME
value: kubernetes
resources:
requests:
cpu: 100m
memory: 128Mi
limits:
cpu: "1"
memory: 512Mi
livenessProbe:
httpGet:
path: /healthz
port: 10267
initialDelaySeconds: 20
periodSeconds: 10
timeoutSeconds: 5
volumeMounts:
- name: cloud-config
mountPath: /etc/azure
readOnly: true
volumes:
- name: cloud-config
secret:
secretName: azure-cloud-controller-manager

View File

@@ -0,0 +1,100 @@
apiVersion: v1
kind: ServiceAccount
metadata:
labels:
k8s-app: cloud-node-manager
kubernetes.io/cluster-service: "true"
addonmanager.kubernetes.io/mode: Reconcile
name: cloud-node-manager
namespace: kube-system
---
kind: ClusterRole
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: cloud-node-manager
labels:
k8s-app: cloud-node-manager
kubernetes.io/cluster-service: "true"
addonmanager.kubernetes.io/mode: Reconcile
rules:
- apiGroups: [""]
resources: ["nodes"]
verbs: ["watch","list","get","update", "patch"]
- apiGroups: [""]
resources: ["nodes/status"]
verbs: ["patch"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: cloud-node-manager
labels:
k8s-app: cloud-node-manager
kubernetes.io/cluster-service: "true"
addonmanager.kubernetes.io/mode: Reconcile
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: cloud-node-manager
subjects:
- kind: ServiceAccount
name: cloud-node-manager
namespace: kube-system
---
apiVersion: apps/v1
kind: DaemonSet
metadata:
name: cloud-node-manager
namespace: kube-system
labels:
component: cloud-node-manager
kubernetes.io/cluster-service: "true"
addonmanager.kubernetes.io/mode: Reconcile
spec:
selector:
matchLabels:
k8s-app: cloud-node-manager
template:
metadata:
labels:
k8s-app: cloud-node-manager
annotations:
cluster-autoscaler.kubernetes.io/daemonset-pod: "true"
spec:
priorityClassName: system-node-critical
serviceAccountName: cloud-node-manager
hostNetwork: true # required to fetch correct hostname
nodeSelector:
kubernetes.io/os: linux
tolerations:
- key: CriticalAddonsOnly
operator: Exists
- key: node-role.kubernetes.io/master
operator: Equal
value: "true"
effect: NoSchedule
- operator: "Exists"
effect: NoExecute
- operator: "Exists"
effect: NoSchedule
containers:
- name: cloud-node-manager
image: mcr.microsoft.com/oss/kubernetes/azure-cloud-node-manager:v1.24.0
imagePullPolicy: IfNotPresent
command:
- cloud-node-manager
- --node-name=$(NODE_NAME)
- --wait-routes=false
- --v=4
env:
- name: NODE_NAME
valueFrom:
fieldRef:
fieldPath: spec.nodeName
resources:
requests:
cpu: 50m
memory: 32Mi
limits:
cpu: 100m
memory: 64Mi

View File

@@ -0,0 +1,202 @@
---
apiVersion: v1
kind: ServiceAccount
metadata:
labels:
k8s-addon: cluster-autoscaler.addons.k8s.io
k8s-app: cluster-autoscaler
name: cluster-autoscaler
namespace: kube-system
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: cluster-autoscaler
labels:
k8s-addon: cluster-autoscaler.addons.k8s.io
k8s-app: cluster-autoscaler
rules:
- apiGroups: [""]
resources: ["events", "endpoints"]
verbs: ["create", "patch"]
- apiGroups: [""]
resources: ["pods/eviction"]
verbs: ["create"]
- apiGroups: [""]
resources: ["pods/status"]
verbs: ["update"]
- apiGroups: [""]
resources: ["endpoints"]
resourceNames: ["cluster-autoscaler"]
verbs: ["get", "update"]
- apiGroups: [""]
resources: ["nodes"]
verbs: ["watch", "list", "get", "update"]
- apiGroups: [""]
resources:
- "namespaces"
- "pods"
- "services"
- "replicationcontrollers"
- "persistentvolumeclaims"
- "persistentvolumes"
verbs: ["watch", "list", "get"]
- apiGroups: ["extensions"]
resources: ["replicasets", "daemonsets"]
verbs: ["watch", "list", "get"]
- apiGroups: ["policy"]
resources: ["poddisruptionbudgets"]
verbs: ["watch", "list"]
- apiGroups: ["apps"]
resources: ["statefulsets", "replicasets", "daemonsets"]
verbs: ["watch", "list", "get"]
- apiGroups: ["storage.k8s.io"]
resources: ["storageclasses", "csinodes"]
verbs: ["get", "list", "watch"]
- apiGroups: ["batch"]
resources: ["jobs", "cronjobs"]
verbs: ["watch", "list", "get"]
- apiGroups: ["coordination.k8s.io"]
resources: ["leases"]
verbs: ["create"]
- apiGroups: ["coordination.k8s.io"]
resourceNames: ["cluster-autoscaler"]
resources: ["leases"]
verbs: ["get", "update"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
name: cluster-autoscaler
namespace: kube-system
labels:
k8s-addon: cluster-autoscaler.addons.k8s.io
k8s-app: cluster-autoscaler
rules:
- apiGroups: [""]
resources: ["configmaps"]
verbs: ["create","list","watch"]
- apiGroups: [""]
resources: ["configmaps"]
resourceNames:
- "cluster-autoscaler-status"
- "cluster-autoscaler-priority-expander"
verbs: ["delete", "get", "update", "watch"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: cluster-autoscaler
labels:
k8s-addon: cluster-autoscaler.addons.k8s.io
k8s-app: cluster-autoscaler
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: cluster-autoscaler
subjects:
- kind: ServiceAccount
name: cluster-autoscaler
namespace: kube-system
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
name: cluster-autoscaler
namespace: kube-system
labels:
k8s-addon: cluster-autoscaler.addons.k8s.io
k8s-app: cluster-autoscaler
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: Role
name: cluster-autoscaler
subjects:
- kind: ServiceAccount
name: cluster-autoscaler
namespace: kube-system
---
apiVersion: apps/v1
kind: Deployment
metadata:
labels:
app: cluster-autoscaler
name: cluster-autoscaler
namespace: kube-system
spec:
replicas: 1
selector:
matchLabels:
app: cluster-autoscaler
template:
metadata:
labels:
app: cluster-autoscaler
spec:
nodeSelector:
kubernetes.io/os: linux
node-role.kubernetes.io/master: ""
tolerations:
- effect: NoSchedule
key: node-role.kubernetes.io/master
operator: Exists
- effect: NoSchedule
key: node-role.kubernetes.io/control-plane
operator: Exists
serviceAccountName: cluster-autoscaler
containers:
- image: k8s.gcr.io/autoscaling/cluster-autoscaler:v1.24.0
imagePullPolicy: Always
name: cluster-autoscaler
resources:
limits:
cpu: 100m
memory: 300Mi
requests:
cpu: 100m
memory: 300Mi
command:
- ./cluster-autoscaler
- --v=3
- --logtostderr=true
- --cloud-provider=azure
# - --regional
- --skip-nodes-with-local-storage=false
- --ignore-daemonsets-utilization
# - --nodes=0:3:web-uksouth
- --node-group-auto-discovery=label:cluster-autoscaler-enabled=true,cluster-autoscaler-name=talos-uksouth
env:
- name: ARM_SUBSCRIPTION_ID
valueFrom:
secretKeyRef:
key: SubscriptionID
name: cluster-autoscaler-azure
- name: ARM_RESOURCE_GROUP
valueFrom:
secretKeyRef:
key: ResourceGroup
name: cluster-autoscaler-azure
- name: ARM_TENANT_ID
valueFrom:
secretKeyRef:
key: TenantID
name: cluster-autoscaler-azure
- name: ARM_CLIENT_ID
valueFrom:
secretKeyRef:
key: ClientID
name: cluster-autoscaler-azure
- name: ARM_CLIENT_SECRET
valueFrom:
secretKeyRef:
key: ClientSecret
name: cluster-autoscaler-azure
- name: ARM_VM_TYPE
valueFrom:
secretKeyRef:
key: VMType
name: cluster-autoscaler-azure
restartPolicy: Always

View File

@@ -0,0 +1,199 @@
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: csi-azuredisk-node-sa
namespace: kube-system
---
kind: ClusterRole
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: csi-azuredisk-node-role
rules:
- apiGroups: [""]
resources: ["secrets"]
verbs: ["get", "list"]
- apiGroups: [""]
resources: ["nodes"]
verbs: ["get"]
---
kind: ClusterRoleBinding
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: csi-azuredisk-node-secret-binding
subjects:
- kind: ServiceAccount
name: csi-azuredisk-node-sa
namespace: kube-system
roleRef:
kind: ClusterRole
name: csi-azuredisk-node-role
apiGroup: rbac.authorization.k8s.io
---
kind: DaemonSet
apiVersion: apps/v1
metadata:
name: csi-azuredisk-node
namespace: kube-system
spec:
updateStrategy:
rollingUpdate:
maxUnavailable: 1
type: RollingUpdate
selector:
matchLabels:
app: csi-azuredisk-node
template:
metadata:
labels:
app: csi-azuredisk-node
spec:
hostNetwork: true
dnsPolicy: Default
serviceAccountName: csi-azuredisk-node-sa
nodeSelector:
kubernetes.io/os: linux
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: type
operator: NotIn
values:
- virtual-kubelet
priorityClassName: system-node-critical
tolerations:
- operator: "Exists"
containers:
- name: liveness-probe
volumeMounts:
- mountPath: /csi
name: socket-dir
image: mcr.microsoft.com/oss/kubernetes-csi/livenessprobe:v2.6.0
args:
- --csi-address=/csi/csi.sock
- --probe-timeout=3s
- --health-port=29603
- --v=2
resources:
limits:
memory: 100Mi
requests:
cpu: 10m
memory: 20Mi
- name: node-driver-registrar
image: mcr.microsoft.com/oss/kubernetes-csi/csi-node-driver-registrar:v2.5.0
args:
- --csi-address=$(ADDRESS)
- --kubelet-registration-path=$(DRIVER_REG_SOCK_PATH)
- --v=2
livenessProbe:
exec:
command:
- /csi-node-driver-registrar
- --kubelet-registration-path=$(DRIVER_REG_SOCK_PATH)
- --mode=kubelet-registration-probe
initialDelaySeconds: 30
timeoutSeconds: 15
env:
- name: ADDRESS
value: /csi/csi.sock
- name: DRIVER_REG_SOCK_PATH
value: /var/lib/kubelet/plugins/disk.csi.azure.com/csi.sock
volumeMounts:
- name: socket-dir
mountPath: /csi
- name: registration-dir
mountPath: /registration
resources:
limits:
memory: 100Mi
requests:
cpu: 10m
memory: 20Mi
- name: azuredisk
image: mcr.microsoft.com/k8s/csi/azuredisk-csi:latest
imagePullPolicy: IfNotPresent
args:
- "--v=12"
- "--endpoint=$(CSI_ENDPOINT)"
- "--nodeid=$(KUBE_NODE_NAME)"
- "--metrics-address=0.0.0.0:29605"
- "--enable-perf-optimization=true"
- "--get-node-info-from-labels=false"
ports:
- containerPort: 29603
name: healthz
protocol: TCP
livenessProbe:
failureThreshold: 5
httpGet:
path: /healthz
port: healthz
initialDelaySeconds: 30
timeoutSeconds: 10
periodSeconds: 30
env:
- name: AZURE_CREDENTIAL_FILE
value: /etc/azure/azure.json
- name: CSI_ENDPOINT
value: unix:///csi/csi.sock
- name: KUBE_NODE_NAME
valueFrom:
fieldRef:
apiVersion: v1
fieldPath: spec.nodeName
securityContext:
privileged: true
volumeMounts:
- name: cloud-config
mountPath: /etc/azure
readOnly: true
- mountPath: /csi
name: socket-dir
- mountPath: /var/lib/kubelet/
mountPropagation: Bidirectional
name: mountpoint-dir
- mountPath: /dev
name: device-dir
- mountPath: /sys/bus/scsi/devices
name: sys-devices-dir
- mountPath: /sys/class/
name: sys-class
resources:
limits:
memory: 200Mi
requests:
cpu: 10m
memory: 20Mi
volumes:
- name: cloud-config
secret:
secretName: azure-cloud-controller-manager
- hostPath:
path: /var/lib/kubelet/plugins/disk.csi.azure.com
type: DirectoryOrCreate
name: socket-dir
- hostPath:
path: /var/lib/kubelet/
type: DirectoryOrCreate
name: mountpoint-dir
- hostPath:
path: /var/lib/kubelet/plugins_registry/
type: DirectoryOrCreate
name: registration-dir
- hostPath:
path: /dev
type: Directory
name: device-dir
- hostPath:
path: /sys/bus/scsi/devices
type: Directory
name: sys-devices-dir
- hostPath:
path: /sys/class/
type: Directory
name: sys-class

View File

@@ -0,0 +1,387 @@
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: csi-azuredisk-controller-sa
namespace: kube-system
---
kind: ClusterRole
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: azuredisk-external-provisioner-role
rules:
- apiGroups: [""]
resources: ["persistentvolumes"]
verbs: ["get", "list", "watch", "create", "delete"]
- apiGroups: [""]
resources: ["persistentvolumeclaims"]
verbs: ["get", "list", "watch", "update"]
- apiGroups: ["storage.k8s.io"]
resources: ["storageclasses"]
verbs: ["get", "list", "watch"]
- apiGroups: [""]
resources: ["events"]
verbs: ["get", "list", "watch", "create", "update", "patch"]
- apiGroups: ["storage.k8s.io"]
resources: ["csinodes"]
verbs: ["get", "list", "watch"]
- apiGroups: [""]
resources: ["nodes"]
verbs: ["get", "list", "watch"]
- apiGroups: ["snapshot.storage.k8s.io"]
resources: ["volumesnapshots"]
verbs: ["get", "list"]
- apiGroups: ["snapshot.storage.k8s.io"]
resources: ["volumesnapshotcontents"]
verbs: ["get", "list"]
- apiGroups: ["coordination.k8s.io"]
resources: ["leases"]
verbs: ["get", "watch", "list", "delete", "update", "create", "patch"]
---
kind: ClusterRoleBinding
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: azuredisk-csi-provisioner-binding
subjects:
- kind: ServiceAccount
name: csi-azuredisk-controller-sa
namespace: kube-system
roleRef:
kind: ClusterRole
name: azuredisk-external-provisioner-role
apiGroup: rbac.authorization.k8s.io
---
kind: ClusterRole
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: azuredisk-external-attacher-role
rules:
- apiGroups: [""]
resources: ["persistentvolumes"]
verbs: ["get", "list", "watch", "update"]
- apiGroups: [""]
resources: ["nodes"]
verbs: ["get", "list", "watch"]
- apiGroups: ["csi.storage.k8s.io"]
resources: ["csinodeinfos"]
verbs: ["get", "list", "watch"]
- apiGroups: ["storage.k8s.io"]
resources: ["volumeattachments"]
verbs: ["get", "list", "watch", "update", "patch"]
- apiGroups: ["storage.k8s.io"]
resources: ["volumeattachments/status"]
verbs: ["get", "list", "watch", "update", "patch"]
- apiGroups: ["coordination.k8s.io"]
resources: ["leases"]
verbs: ["get", "watch", "list", "delete", "update", "create", "patch"]
---
kind: ClusterRoleBinding
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: azuredisk-csi-attacher-binding
subjects:
- kind: ServiceAccount
name: csi-azuredisk-controller-sa
namespace: kube-system
roleRef:
kind: ClusterRole
name: azuredisk-external-attacher-role
apiGroup: rbac.authorization.k8s.io
---
kind: ClusterRole
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: azuredisk-external-snapshotter-role
rules:
- apiGroups: [""]
resources: ["events"]
verbs: ["list", "watch", "create", "update", "patch"]
- apiGroups: [""]
resources: ["secrets"]
verbs: ["get", "list"]
- apiGroups: ["snapshot.storage.k8s.io"]
resources: ["volumesnapshotclasses"]
verbs: ["get", "list", "watch"]
- apiGroups: ["snapshot.storage.k8s.io"]
resources: ["volumesnapshotcontents"]
verbs: ["create", "get", "list", "watch", "update", "delete", "patch"]
- apiGroups: ["snapshot.storage.k8s.io"]
resources: ["volumesnapshotcontents/status"]
verbs: ["update", "patch"]
- apiGroups: ["coordination.k8s.io"]
resources: ["leases"]
verbs: ["get", "watch", "list", "delete", "update", "create", "patch"]
---
kind: ClusterRoleBinding
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: azuredisk-csi-snapshotter-binding
subjects:
- kind: ServiceAccount
name: csi-azuredisk-controller-sa
namespace: kube-system
roleRef:
kind: ClusterRole
name: azuredisk-external-snapshotter-role
apiGroup: rbac.authorization.k8s.io
---
kind: ClusterRole
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: azuredisk-external-resizer-role
rules:
- apiGroups: [""]
resources: ["persistentvolumes"]
verbs: ["get", "list", "watch", "update", "patch"]
- apiGroups: [""]
resources: ["persistentvolumeclaims"]
verbs: ["get", "list", "watch"]
- apiGroups: [""]
resources: ["persistentvolumeclaims/status"]
verbs: ["update", "patch"]
- apiGroups: [""]
resources: ["events"]
verbs: ["list", "watch", "create", "update", "patch"]
- apiGroups: ["coordination.k8s.io"]
resources: ["leases"]
verbs: ["get", "watch", "list", "delete", "update", "create", "patch"]
- apiGroups: [""]
resources: ["pods"]
verbs: ["get", "list", "watch"]
---
kind: ClusterRoleBinding
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: azuredisk-csi-resizer-role
subjects:
- kind: ServiceAccount
name: csi-azuredisk-controller-sa
namespace: kube-system
roleRef:
kind: ClusterRole
name: azuredisk-external-resizer-role
apiGroup: rbac.authorization.k8s.io
---
kind: ClusterRole
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: csi-azuredisk-controller-secret-role
rules:
- apiGroups: [""]
resources: ["secrets"]
verbs: ["get", "list"]
---
kind: ClusterRoleBinding
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: csi-azuredisk-controller-secret-binding
subjects:
- kind: ServiceAccount
name: csi-azuredisk-controller-sa
namespace: kube-system
roleRef:
kind: ClusterRole
name: csi-azuredisk-controller-secret-role
apiGroup: rbac.authorization.k8s.io
---
kind: Deployment
apiVersion: apps/v1
metadata:
name: csi-azuredisk-controller
namespace: kube-system
spec:
replicas: 1
selector:
matchLabels:
app: csi-azuredisk-controller
template:
metadata:
labels:
app: csi-azuredisk-controller
spec:
hostNetwork: true
serviceAccountName: csi-azuredisk-controller-sa
nodeSelector:
kubernetes.io/os: linux
priorityClassName: system-cluster-critical
tolerations:
- key: "node-role.kubernetes.io/master"
operator: "Exists"
effect: "NoSchedule"
- key: "node-role.kubernetes.io/controlplane"
operator: "Exists"
effect: "NoSchedule"
- key: "node-role.kubernetes.io/control-plane"
operator: "Exists"
effect: "NoSchedule"
containers:
- name: csi-provisioner
image: mcr.microsoft.com/oss/kubernetes-csi/csi-provisioner:v3.1.0
args:
- "--feature-gates=Topology=true"
- "--csi-address=$(ADDRESS)"
- "--v=2"
- "--timeout=15s"
- "--leader-election"
- "--leader-election-namespace=kube-system"
- "--worker-threads=40"
- "--extra-create-metadata=true"
- "--strict-topology=true"
env:
- name: ADDRESS
value: /csi/csi.sock
volumeMounts:
- mountPath: /csi
name: socket-dir
resources:
limits:
memory: 500Mi
requests:
cpu: 10m
memory: 20Mi
- name: csi-attacher
image: mcr.microsoft.com/oss/kubernetes-csi/csi-attacher:v3.4.0
args:
- "-v=2"
- "-csi-address=$(ADDRESS)"
- "-timeout=600s"
- "-leader-election"
- "--leader-election-namespace=kube-system"
- "-worker-threads=500"
env:
- name: ADDRESS
value: /csi/csi.sock
volumeMounts:
- mountPath: /csi
name: socket-dir
resources:
limits:
memory: 500Mi
requests:
cpu: 10m
memory: 20Mi
- name: csi-snapshotter
image: mcr.microsoft.com/oss/kubernetes-csi/csi-snapshotter:v5.0.1
args:
- "-csi-address=$(ADDRESS)"
- "-leader-election"
- "--leader-election-namespace=kube-system"
- "--v=2"
env:
- name: ADDRESS
value: /csi/csi.sock
volumeMounts:
- name: socket-dir
mountPath: /csi
resources:
limits:
memory: 100Mi
requests:
cpu: 10m
memory: 20Mi
- name: csi-resizer
image: mcr.microsoft.com/oss/kubernetes-csi/csi-resizer:v1.4.0
args:
- "-csi-address=$(ADDRESS)"
- "-v=2"
- "-leader-election"
- "--leader-election-namespace=kube-system"
- '-handle-volume-inuse-error=false'
- '-feature-gates=RecoverVolumeExpansionFailure=true'
- "-timeout=240s"
env:
- name: ADDRESS
value: /csi/csi.sock
volumeMounts:
- name: socket-dir
mountPath: /csi
resources:
limits:
memory: 500Mi
requests:
cpu: 10m
memory: 20Mi
- name: liveness-probe
image: mcr.microsoft.com/oss/kubernetes-csi/livenessprobe:v2.6.0
args:
- --csi-address=/csi/csi.sock
- --probe-timeout=3s
- --health-port=29602
- --v=2
volumeMounts:
- name: socket-dir
mountPath: /csi
resources:
limits:
memory: 100Mi
requests:
cpu: 10m
memory: 20Mi
- name: azuredisk
image: mcr.microsoft.com/k8s/csi/azuredisk-csi:latest
imagePullPolicy: IfNotPresent
args:
- "--v=5"
- "--endpoint=$(CSI_ENDPOINT)"
- "--metrics-address=0.0.0.0:29604"
- "--user-agent-suffix=OSS-kubectl"
- "--disable-avset-nodes=false"
- "--allow-empty-cloud-config=false"
ports:
- containerPort: 29602
name: healthz
protocol: TCP
- containerPort: 29604
name: metrics
protocol: TCP
livenessProbe:
failureThreshold: 5
httpGet:
path: /healthz
port: healthz
initialDelaySeconds: 30
timeoutSeconds: 10
periodSeconds: 30
env:
- name: AZURE_CREDENTIAL_FILE
value: /etc/azure/azure.json
- name: CSI_ENDPOINT
value: unix:///csi/csi.sock
volumeMounts:
- name: cloud-config
mountPath: /etc/azure
readOnly: true
- mountPath: /csi
name: socket-dir
resources:
limits:
memory: 500Mi
requests:
cpu: 10m
memory: 20Mi
volumes:
- name: cloud-config
secret:
secretName: azure-cloud-controller-manager
- name: socket-dir
emptyDir: {}
---
apiVersion: storage.k8s.io/v1
kind: CSIDriver
metadata:
name: disk.csi.azure.com
spec:
attachRequired: true
podInfoOnMount: false
fsGroupPolicy: File

View File

@@ -0,0 +1,62 @@
apiVersion: storage.k8s.io/v1
kind: StorageClass
metadata:
annotations:
storageclass.kubernetes.io/is-default-class: "false"
name: csi-azure-hdd-xfs
provisioner: kubernetes.io/azure-disk
parameters:
kind: Managed
cachingMode: ReadOnly
fsType: xfs
skuName: Standard_LRS
zoned: "true"
reclaimPolicy: Delete
volumeBindingMode: WaitForFirstConsumer
allowedTopologies:
- matchLabelExpressions:
- key: project.io/cloudprovider-type
values:
- azure
---
apiVersion: storage.k8s.io/v1
kind: StorageClass
metadata:
annotations:
storageclass.kubernetes.io/is-default-class: "false"
name: csi-azure-ssd-xfs
provisioner: kubernetes.io/azure-disk
parameters:
kind: Managed
cachingMode: ReadOnly
fsType: xfs
skuName: StandardSSD_LRS
zoned: "true"
reclaimPolicy: Delete
volumeBindingMode: WaitForFirstConsumer
allowedTopologies:
- matchLabelExpressions:
- key: project.io/cloudprovider-type
values:
- azure
---
apiVersion: storage.k8s.io/v1
kind: StorageClass
metadata:
annotations:
storageclass.kubernetes.io/is-default-class: "false"
name: csi-azure-premium-xfs
provisioner: kubernetes.io/azure-disk
parameters:
kind: Managed
cachingMode: ReadOnly
fsType: xfs
skuName: Premium_LRS # available values: Standard_LRS, Premium_LRS, StandardSSD_LRS and UltraSSD_LRS
zoned: "true"
reclaimPolicy: Delete
volumeBindingMode: WaitForFirstConsumer
allowedTopologies:
- matchLabelExpressions:
- key: project.io/cloudprovider-type
values:
- azure

View File

@@ -18,7 +18,6 @@ data:
hosts: |
# static hosts
169.254.2.53 dns.local
fd00::169:254:2:53 dns.local
Corefile.local: |
(empty) {
@@ -27,7 +26,7 @@ data:
.:53 {
errors
bind 169.254.2.53 fd00::169:254:2:53
bind 169.254.2.53
health 127.0.0.1:8091 {
lameduck 5s

View File

@@ -127,6 +127,9 @@ spec:
labels:
k8s-app: metrics-server
spec:
nodeSelector:
kubernetes.io/os: linux
node-role.kubernetes.io/master: ""
tolerations:
- key: "CriticalAddonsOnly"
operator: "Exists"
@@ -172,8 +175,6 @@ spec:
volumeMounts:
- mountPath: /tmp
name: tmp-dir
nodeSelector:
kubernetes.io/os: linux
priorityClassName: system-cluster-critical
serviceAccountName: metrics-server
volumes:

View File

@@ -0,0 +1,48 @@
apiVersion: scheduling.k8s.io/v1
kind: PriorityClass
metadata:
name: overprovisioning
value: -1
globalDefault: false
description: "Priority class used by overprovisioning."
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: overprovisioning
namespace: default
spec:
replicas: 1
selector:
matchLabels:
run: overprovisioning
template:
metadata:
labels:
run: overprovisioning
spec:
nodeSelector:
project.io/node-pool: web
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: node.kubernetes.io/instance-type
operator: Exists
podAntiAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
- topologyKey: kubernetes.io/hostname
labelSelector:
matchExpressions:
- key: run
operator: In
values:
- overprovisioning
priorityClassName: overprovisioning
containers:
- name: reserve-resources
image: k8s.gcr.io/pause
resources:
requests:
cpu: "700m"

View File

@@ -0,0 +1,42 @@
apiVersion: apps/v1
kind: StatefulSet
metadata:
name: statefulset-azuredisk
namespace: default
labels:
app: nginx
spec:
podManagementPolicy: Parallel # default is OrderedReady
serviceName: statefulset-azuredisk
replicas: 1
template:
metadata:
labels:
app: nginx
spec:
nodeSelector:
"kubernetes.io/os": linux
containers:
- name: statefulset-azuredisk
image: mcr.microsoft.com/oss/nginx/nginx:1.19.5
command:
- "/bin/bash"
- "-c"
- set -euo pipefail; while true; do echo $(date) >> /mnt/azuredisk/outfile; sleep 1; done
volumeMounts:
- name: persistent-storage
mountPath: /mnt/azuredisk
updateStrategy:
type: RollingUpdate
selector:
matchLabels:
app: nginx
volumeClaimTemplates:
- metadata:
name: persistent-storage
spec:
accessModes: ["ReadWriteOnce"]
resources:
requests:
storage: 10Gi
storageClassName: csi-azure-ssd-xfs

View File

@@ -91,7 +91,7 @@ resource "azurerm_image" "talos" {
}
resource "azurerm_shared_image_version" "talos" {
name = "0.0.3"
name = "0.0.5"
location = var.regions[0]
resource_group_name = data.azurerm_resource_group.kubernetes.name
gallery_name = azurerm_shared_image.talos.gallery_name

View File

@@ -1,6 +1,6 @@
locals {
web_labels = "topology.kubernetes.io/zone=azure,project.io/node-pool=web"
web_labels = "project.io/cloudprovider-type=azure,project.io/node-pool=web"
}
resource "azurerm_linux_virtual_machine_scale_set" "web" {
@@ -63,11 +63,19 @@ resource "azurerm_linux_virtual_machine_scale_set" "web" {
# version = "latest"
# }
tags = merge(var.tags, { type = "web" })
tags = merge(var.tags, {
type = "web",
"cluster-autoscaler-enabled" = "true",
"cluster-autoscaler-name" = "${local.resource_group}-${lower(each.key)}",
"min" = 0,
"max" = 3,
"k8s.io_cluster-autoscaler_node-template_label_project.io_node-pool" = "web"
})
boot_diagnostics {}
lifecycle {
ignore_changes = [admin_username, admin_ssh_key, os_disk, source_image_id, tags]
ignore_changes = [instances, admin_username, admin_ssh_key, os_disk, source_image_id]
}
}

View File

@@ -1,6 +1,6 @@
locals {
worker_labels = "topology.kubernetes.io/zone=azure,project.io/node-pool=worker"
worker_labels = "project.io/cloudprovider-type=azure,project.io/node-pool=worker"
}
resource "azurerm_linux_virtual_machine_scale_set" "worker" {
@@ -16,6 +16,11 @@ resource "azurerm_linux_virtual_machine_scale_set" "worker" {
overprovision = false
# availability_set_id = var.instance_availability_set
# health_probe_id = ""
# automatic_instance_repair {
# enabled = true
# grace_period = "PT30M"
# }
network_interface {
name = "worker-${lower(each.key)}"
@@ -69,15 +74,18 @@ resource "azurerm_linux_virtual_machine_scale_set" "worker" {
# version = "latest"
# }
tags = merge(var.tags, { type = "worker" })
tags = merge(var.tags, {
type = "worker",
"cluster-autoscaler-enabled" = "true",
"cluster-autoscaler-name" = "${local.resource_group}-${lower(each.key)}",
"min" = 0,
"max" = 3,
# automatic_instance_repair {
# enabled = true
# grace_period = "PT30M"
# }
"k8s.io_cluster-autoscaler_node-template_label_project.io_node-pool" = "worker"
})
boot_diagnostics {}
lifecycle {
ignore_changes = [admin_username, admin_ssh_key, os_disk, source_image_id, tags]
ignore_changes = [instances, admin_username, admin_ssh_key, os_disk, source_image_id]
}
}

View File

@@ -72,7 +72,7 @@ resource "azurerm_network_interface_backend_address_pool_association" "controlpl
}
locals {
controlplane_labels = "topology.kubernetes.io/region=${var.region},topology.kubernetes.io/zone=azure"
controlplane_labels = "project.io/cloudprovider-type=azure,topology.kubernetes.io/region=${var.region},kubernetes.azure.com/managed=false"
}
resource "azurerm_linux_virtual_machine" "controlplane" {

View File

@@ -54,6 +54,7 @@ resource "azurerm_lb_rule" "web_http_v4" {
backend_port = 80
idle_timeout_in_minutes = 30
enable_tcp_reset = local.network_public[each.key].sku != "Basic"
disable_outbound_snat = local.network_public[each.key].sku != "Basic"
}
resource "azurerm_lb_rule" "web_https_v4" {
@@ -69,4 +70,18 @@ resource "azurerm_lb_rule" "web_https_v4" {
backend_port = 443
idle_timeout_in_minutes = 30
enable_tcp_reset = local.network_public[each.key].sku != "Basic"
disable_outbound_snat = local.network_public[each.key].sku != "Basic"
}
resource "azurerm_lb_outbound_rule" "web" {
for_each = { for idx, name in local.regions : name => idx if local.network_public[name].sku != "Basic" }
name = "snat"
loadbalancer_id = azurerm_lb.web[each.key].id
backend_address_pool_id = azurerm_lb_backend_address_pool.web_v4[each.key].id
protocol = "All"
allocated_outbound_ports = 1024
frontend_ip_configuration {
name = "web-lb-v4"
}
}

View File

@@ -63,7 +63,7 @@ variable "capabilities" {
default = {
"uksouth" = {
network_nat_enable = false,
network_lb_type = "Basic",
network_lb_type = "Basic", # Standard
network_gw_enable = false,
network_gw_type = "Standard_B1s",

View File

@@ -12,7 +12,6 @@ machine:
validSubnets: ${format("%#v",nodeSubnets)}
clusterDNS:
- 169.254.2.53
- fd00::169:254:2:53
- ${cidrhost(split(",",serviceSubnets)[0], 10)}
network:
hostname: "${name}"
@@ -23,7 +22,6 @@ machine:
- interface: dummy0
addresses:
- 169.254.2.53/32
- fd00::169:254:2:53/128
extraHostEntries:
- ip: ${lbv4}
aliases:
@@ -57,6 +55,11 @@ cluster:
externalCloudProvider:
enabled: true
manifests:
- https://raw.githubusercontent.com/sergelogvinov/terraform-talos/main/azure/deployments/azure-cloud-controller-manager.yaml
- https://raw.githubusercontent.com/sergelogvinov/terraform-talos/main/azure/deployments/azure-cloud-node-manager.yaml
- https://raw.githubusercontent.com/sergelogvinov/terraform-talos/main/azure/deployments/azure-csi-node.yaml
- https://raw.githubusercontent.com/sergelogvinov/terraform-talos/main/azure/deployments/azure-csi.yaml
- https://raw.githubusercontent.com/sergelogvinov/terraform-talos/main/azure/deployments/azure-storage.yaml
- https://raw.githubusercontent.com/sergelogvinov/terraform-talos/main/azure/deployments/kubelet-serving-cert-approver.yaml
- https://raw.githubusercontent.com/sergelogvinov/terraform-talos/main/azure/deployments/metrics-server.yaml
- https://raw.githubusercontent.com/sergelogvinov/terraform-talos/main/azure/deployments/local-path-storage.yaml

View File

@@ -15,14 +15,12 @@ machine:
validSubnets: ${format("%#v",nodeSubnets)}
clusterDNS:
- 169.254.2.53
- fd00::169:254:2:53
- ${cidrhost(split(",",serviceSubnets)[0], 10)}
network:
interfaces:
- interface: dummy0
addresses:
- 169.254.2.53/32
- fd00::169:254:2:53/128
extraHostEntries:
- ip: ${lbv4}
aliases:

View File

@@ -174,6 +174,6 @@ spec:
requests:
cpu: 100m
volumes:
- name: cloud-config
secret:
secretName: openstack-cloud-controller-manager
- name: cloud-config
secret:
secretName: openstack-cloud-controller-manager