autoscaller

This commit is contained in:
Serge Logvinov
2022-12-24 15:14:45 +02:00
parent cf6b119303
commit b646502fd6
16 changed files with 286 additions and 32 deletions

View File

@@ -12,7 +12,7 @@ The goal is to create all cloud services from scratch.
| [GCP](gcp-zonal) | 0.14.0 | CCM,CSI,Autoscaler | one region, many zones | ✓ |
| [Hetzner](hetzner) | 1.3.0 | CCM,CSI,Autoscaler | many regions | ✗ |
| [Openstack](openstack) | 1.3.0 | CCM,CSI | many regions, many zones | ✓ |
| [Oracle](oracle) | 1.3.0 | | many regions, many zones | ✓ |
| [Oracle](oracle) | 1.3.0 | CCM,~~CSI~~,Autoscaler | one region, many zones | ✓ |
| [Scaleway](scaleway) | 1.3.0 | CCM,CSI | one region | ✓ |

View File

@@ -137,21 +137,9 @@ spec:
app: cluster-autoscaler
spec:
nodeSelector:
kubernetes.io/os: linux
node-role.kubernetes.io/master: ""
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: project.io/cloudprovider-type
operator: In
values:
- azure
node-role.kubernetes.io/control-plane: ""
node.cloudprovider.kubernetes.io/platform: azure
tolerations:
- effect: NoSchedule
key: node-role.kubernetes.io/master
operator: Exists
- effect: NoSchedule
key: node-role.kubernetes.io/control-plane
operator: Exists

View File

@@ -51,6 +51,6 @@ create-kubeconfig: ## Download kubeconfig
create-deployments:
helm template --namespace=kube-system --version=1.12.4 -f deployments/cilium.yaml cilium \
cilium/cilium > deployments/cilium_result.yaml
cilium/cilium > deployments/cilium-result.yaml
helm template --namespace=ingress-nginx --version=4.4.0 -f deployments/ingress.yaml ingress-nginx \
ingress-nginx/ingress-nginx > deployments/ingress_result.yaml
ingress-nginx/ingress-nginx > deployments/ingress-result.yaml

View File

@@ -0,0 +1,184 @@
---
apiVersion: v1
kind: ServiceAccount
metadata:
labels:
k8s-addon: cluster-autoscaler.addons.k8s.io
k8s-app: cluster-autoscaler
name: cluster-autoscaler
namespace: kube-system
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: cluster-autoscaler
labels:
k8s-addon: cluster-autoscaler.addons.k8s.io
k8s-app: cluster-autoscaler
rules:
- apiGroups: [""]
resources: ["events", "endpoints"]
verbs: ["create", "patch"]
- apiGroups: [""]
resources: ["pods/eviction"]
verbs: ["create"]
- apiGroups: [""]
resources: ["pods/status"]
verbs: ["update"]
- apiGroups: [""]
resources: ["endpoints"]
resourceNames: ["cluster-autoscaler"]
verbs: ["get", "update"]
- apiGroups: [""]
resources: ["nodes"]
verbs: ["watch", "list", "get", "update"]
- apiGroups: [""]
resources:
- "namespaces"
- "pods"
- "services"
- "replicationcontrollers"
- "persistentvolumeclaims"
- "persistentvolumes"
verbs: ["watch", "list", "get"]
- apiGroups: ["extensions"]
resources: ["replicasets", "daemonsets"]
verbs: ["watch", "list", "get"]
- apiGroups: ["policy"]
resources: ["poddisruptionbudgets"]
verbs: ["watch", "list"]
- apiGroups: ["apps"]
resources: ["statefulsets", "replicasets", "daemonsets"]
verbs: ["watch", "list", "get"]
- apiGroups: ["storage.k8s.io"]
resources: ["storageclasses", "csinodes", "csistoragecapacities", "csidrivers"]
verbs: ["watch", "list", "get"]
- apiGroups: ["batch", "extensions"]
resources: ["jobs"]
verbs: ["get", "list", "watch", "patch"]
- apiGroups: ["coordination.k8s.io"]
resources: ["leases"]
verbs: ["create"]
- apiGroups: ["coordination.k8s.io"]
resourceNames: ["cluster-autoscaler"]
resources: ["leases"]
verbs: ["get", "update"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
name: cluster-autoscaler
namespace: kube-system
labels:
k8s-addon: cluster-autoscaler.addons.k8s.io
k8s-app: cluster-autoscaler
rules:
- apiGroups: [""]
resources: ["configmaps"]
verbs: ["create","list","watch"]
- apiGroups: [""]
resources: ["configmaps"]
resourceNames: ["cluster-autoscaler-status", "cluster-autoscaler-priority-expander"]
verbs: ["delete", "get", "update", "watch"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: cluster-autoscaler
labels:
k8s-addon: cluster-autoscaler.addons.k8s.io
k8s-app: cluster-autoscaler
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: cluster-autoscaler
subjects:
- kind: ServiceAccount
name: cluster-autoscaler
namespace: kube-system
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
name: cluster-autoscaler
namespace: kube-system
labels:
k8s-addon: cluster-autoscaler.addons.k8s.io
k8s-app: cluster-autoscaler
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: Role
name: cluster-autoscaler
subjects:
- kind: ServiceAccount
name: cluster-autoscaler
namespace: kube-system
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: cluster-autoscaler
namespace: kube-system
labels:
app: cluster-autoscaler
spec:
replicas: 1
selector:
matchLabels:
app: cluster-autoscaler
template:
metadata:
labels:
app: cluster-autoscaler
annotations:
prometheus.io/scrape: 'true'
prometheus.io/port: '8085'
spec:
serviceAccountName: cluster-autoscaler
nodeSelector:
node-role.kubernetes.io/control-plane: ""
node.cloudprovider.kubernetes.io/platform: oracle
tolerations:
- key: node-role.kubernetes.io/control-plane
effect: NoSchedule
containers:
- name: cluster-autoscaler
image: k8s.gcr.io/autoscaling/cluster-autoscaler:v1.25.0
name: cluster-autoscaler
resources:
limits:
cpu: 100m
memory: 300Mi
requests:
cpu: 100m
memory: 300Mi
command:
- ./cluster-autoscaler
- --v=5
- --logtostderr=true
- --cloud-config=/etc/oci/config.ini
- --cloud-provider=oci
- --node-deletion-delay-timeout=10m0s
- --scan-interval=3m
- --regional
- --balance-similar-node-groups
- --nodes=0:2:ocid1.instancepool.oc1.phx.aaaaaaaaky6u2wqdvvpmedbm5hdczjiauyojquzvaadvlqqx46na2pw2jebq
- --nodes=0:2:ocid1.instancepool.oc1.phx.aaaaaaaa72d7f7nuu5z547kh7uij4elxanzjimo27kjvc4fiobsb467u2usa
- --nodes=0:2:ocid1.instancepool.oc1.phx.aaaaaaaakozlxws34b3yx53cmtzrxjf3vw4oq357xp5njmuka4wij52cfjda
- --v=5
env:
- name: OCI_USE_INSTANCE_PRINCIPAL
value: "true"
- name: OCI_REGION
value: "us-phoenix-1"
volumeMounts:
- name: cloud-config
mountPath: /etc/oci
readOnly: true
volumes:
- name: cloud-config
secret:
secretName: oci-cloud-controller-manager

View File

@@ -0,0 +1,51 @@
apiVersion: scheduling.k8s.io/v1
kind: PriorityClass
metadata:
name: overprovisioning
value: -1
globalDefault: false
description: "Priority class used by overprovisioning."
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: overprovisioning
namespace: default
spec:
replicas: 1
selector:
matchLabels:
run: overprovisioning
template:
metadata:
labels:
run: overprovisioning
spec:
nodeSelector:
node.cloudprovider.kubernetes.io/platform: oracle
tolerations:
- key: DeletionCandidateOfClusterAutoscaler
effect: PreferNoSchedule
affinity:
podAntiAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
- topologyKey: kubernetes.io/hostname
labelSelector:
matchExpressions:
- key: run
operator: In
values:
- overprovisioning
priorityClassName: overprovisioning
containers:
- name: reserve-resources
image: registry.k8s.io/pause:3.6
resources:
requests:
cpu: "700m"
securityContext:
allowPrivilegeEscalation: false
seccompProfile:
type: RuntimeDefault
capabilities:
drop: ["ALL"]

View File

@@ -24,14 +24,40 @@ resource "oci_identity_policy" "terraform" {
resource "oci_identity_policy" "ccm" {
name = "ccm"
description = "policy created by terraform for ccm"
description = "This is a kubernetes role for CCM, created via Terraform"
compartment_id = oci_identity_compartment.project.id
# https://github.com/oracle/oci-cloud-controller-manager/blob/master/manifests/provider-config-example.yaml
statements = [
"Allow dynamic-group ${oci_identity_dynamic_group.ccm.name} to read instance-family in compartment ${oci_identity_compartment.project.name}",
"Allow dynamic-group ${oci_identity_dynamic_group.ccm.name} to read virtual-network-family in compartment ${oci_identity_compartment.project.name}"
# "Allow dynamic-group ${oci_identity_dynamic_group.ccm.name} to manage load-balancers in compartment ${oci_identity_compartment.project.name}"
"Allow dynamic-group ${oci_identity_dynamic_group.ccm.name} to read virtual-network-family in compartment ${oci_identity_compartment.project.name}",
# "Allow dynamic-group ${oci_identity_dynamic_group.ccm.name} to manage load-balancers in compartment ${oci_identity_compartment.project.name}",
]
}
resource "oci_identity_policy" "csi" {
name = "csi"
description = "This is a kubernetes role for CSI, created via Terraform"
compartment_id = oci_identity_compartment.project.id
statements = [
"Allow dynamic-group ${oci_identity_dynamic_group.ccm.name} to manage volumes in compartment ${oci_identity_compartment.project.name}",
"Allow dynamic-group ${oci_identity_dynamic_group.ccm.name} to manage volume-attachments in compartment ${oci_identity_compartment.project.name}",
]
}
resource "oci_identity_policy" "scaler" {
name = "scaler"
description = "This is a kubernetes role for node autoscaler system, created via Terraform"
compartment_id = oci_identity_compartment.project.id
statements = [
"Allow dynamic-group ${oci_identity_dynamic_group.ccm.name} to manage instance-pools in compartment ${oci_identity_compartment.project.name}",
"Allow dynamic-group ${oci_identity_dynamic_group.ccm.name} to manage instance-configurations in compartment ${oci_identity_compartment.project.name}",
"Allow dynamic-group ${oci_identity_dynamic_group.ccm.name} to manage instance-family in compartment ${oci_identity_compartment.project.name}",
"Allow dynamic-group ${oci_identity_dynamic_group.ccm.name} to use subnets in compartment ${oci_identity_compartment.project.name}",
"Allow dynamic-group ${oci_identity_dynamic_group.ccm.name} to read virtual-network-family in compartment ${oci_identity_compartment.project.name}",
"Allow dynamic-group ${oci_identity_dynamic_group.ccm.name} to use vnics in compartment ${oci_identity_compartment.project.name}",
]
}

View File

@@ -11,7 +11,10 @@ resource "oci_core_ipv6" "contolplane" {
}
locals {
contolplane_labels = "topology.kubernetes.io/region=${var.region}"
oci = templatefile("${path.module}/templates/oci.ini", {
compartment_id = var.compartment_ocid
region = var.region
})
}
resource "oci_core_instance" "contolplane" {
@@ -32,12 +35,12 @@ resource "oci_core_instance" "contolplane" {
metadata = {
user_data = base64encode(templatefile("${path.module}/templates/controlplane.yaml",
merge(var.kubernetes, {
name = "contolplane-${count.index + 1}"
name = "${local.project}-contolplane-${count.index + 1}"
lbv4 = local.lbv4
lbv4_local = local.lbv4_local
nodeSubnets = local.network_public[element(local.zones, count.index)].cidr_block
labels = local.contolplane_labels
ccm = base64encode("useInstancePrincipals: true\nloadBalancer:\n disabled: true")
ccm = filebase64("${path.module}/templates/oci-cloud-provider.yaml")
oci = base64encode(local.oci)
})
))
}

View File

@@ -53,7 +53,7 @@ resource "oci_core_instance_configuration" "worker" {
merge(var.kubernetes, {
lbv4 = local.lbv4_local
clusterDns = cidrhost(split(",", var.kubernetes["serviceSubnets"])[0], 10)
nodeSubnets = local.network_public[each.key].cidr_block
nodeSubnets = local.network_private[each.key].cidr_block
labels = local.worker_labels
})
))

View File

@@ -31,14 +31,18 @@ variable "kubernetes" {
default = {
podSubnets = "10.32.0.0/12,fd40:10:32::/102"
serviceSubnets = "10.200.0.0/22,fd40:10:200::/112"
nodeSubnets = "192.168.0.0/16"
domain = "cluster.local"
apiDomain = "api.cluster.local"
clusterName = "talos-k8s-oracle"
clusterID = ""
clusterSecret = ""
tokenMachine = ""
caMachine = ""
token = ""
ca = ""
}
sensitive = true
}
variable "vpc_main_cidr" {

View File

@@ -17,7 +17,6 @@ machine:
kubelet:
extraArgs:
rotate-server-certificates: true
node-labels: ${labels}
clusterDNS:
- 169.254.2.53
- ${cidrhost(split(",",serviceSubnets)[0], 10)}
@@ -120,6 +119,7 @@ cluster:
namespace: kube-system
data:
cloud-provider.yaml: ${ccm}
config.ini: ${oci}
externalCloudProvider:
enabled: true
manifests:

4
oracle/templates/oci.ini Normal file
View File

@@ -0,0 +1,4 @@
[Global]
compartment-id = ${compartment_id}
region = ${region}
use-instance-principals = true

View File

@@ -36,9 +36,6 @@ machine:
keys:
- nodeID: {}
slot: 0
time:
servers:
- 169.254.169.254
cluster:
id: ${clusterID}
secret: ${clusterSecret}

View File

@@ -36,9 +36,6 @@ machine:
keys:
- nodeID: {}
slot: 0
time:
servers:
- 169.254.169.254
cluster:
id: ${clusterID}
secret: ${clusterSecret}