diff --git a/packages/apps/kubernetes/Chart.yaml b/packages/apps/kubernetes/Chart.yaml index cc8b5a83..05076167 100644 --- a/packages/apps/kubernetes/Chart.yaml +++ b/packages/apps/kubernetes/Chart.yaml @@ -16,7 +16,7 @@ type: application # This is the chart version. This version number should be incremented each time you make changes # to the chart and its templates, including the app version. # Versions are expected to follow Semantic Versioning (https://semver.org/) -version: 0.18.1 +version: 0.19.0 # This is the version number of the application being deployed. This version number should be # incremented each time you make changes to the application. Versions are not expected to diff --git a/packages/apps/kubernetes/README.md b/packages/apps/kubernetes/README.md index bbc55cfd..3067a323 100644 --- a/packages/apps/kubernetes/README.md +++ b/packages/apps/kubernetes/README.md @@ -27,20 +27,46 @@ How to access to deployed cluster: kubectl get secret -n kubernetes--admin-kubeconfig -o go-template='{{ printf "%s\n" (index .data "super-admin.conf" | base64decode) }}' > test ``` -# Series +## Parameters - +### Common parameters -. | U | O | CX | M | RT -----------------------------|-----|-----|------|-----|------ -*Has GPUs* | | | | | -*Hugepages* | | | ✓ | ✓ | ✓ -*Overcommitted Memory* | | ✓ | | | -*Dedicated CPU* | | | ✓ | | ✓ -*Burstable CPU performance* | ✓ | ✓ | | ✓ | -*Isolated emulator threads* | | | ✓ | | ✓ -*vNUMA* | | | ✓ | | ✓ -*vCPU-To-Memory Ratio* | 1:4 | 1:4 | 1:2 | 1:8 | 1:4 +| Name | Description | Value | +| ----------------------- | -------------------------------------------------------------------------------------------------------------------------------------- | ------------ | +| `host` | The hostname used to access the Kubernetes cluster externally (defaults to using the cluster name as a subdomain for the tenant host). | `""` | +| `controlPlane.replicas` | Number of replicas for Kubernetes control-plane components | `2` | +| `storageClass` | StorageClass used to store user data | `replicated` | +| `nodeGroups` | nodeGroups configuration | `{}` | + +### Cluster Addons + +| Name | Description | Value | +| --------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------- | +| `addons.certManager.enabled` | Enables the cert-manager | `false` | +| `addons.certManager.valuesOverride` | Custom values to override | `{}` | +| `addons.ingressNginx.enabled` | Enable Ingress-NGINX controller (expect nodes with 'ingress-nginx' role) | `false` | +| `addons.ingressNginx.valuesOverride` | Custom values to override | `{}` | +| `addons.ingressNginx.hosts` | List of domain names that should be passed through to the cluster by upper cluster | `[]` | +| `addons.gpuOperator.enabled` | Enables the gpu-operator | `false` | +| `addons.gpuOperator.valuesOverride` | Custom values to override | `{}` | +| `addons.fluxcd.enabled` | Enables Flux CD | `false` | +| `addons.fluxcd.valuesOverride` | Custom values to override | `{}` | +| `addons.monitoringAgents.enabled` | Enables MonitoringAgents (fluentbit, vmagents for sending logs and metrics to storage) if tenant monitoring enabled, send to tenant storage, else to root storage | `false` | +| `addons.monitoringAgents.valuesOverride` | Custom values to override | `{}` | +| `addons.verticalPodAutoscaler.valuesOverride` | Custom values to override | `{}` | + +### Kamaji control plane + +| Name | Description | Value | +| --------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------- | +| `kamajiControlPlane.apiServer.resources` | Resources | `{}` | +| `kamajiControlPlane.apiServer.resourcesPreset` | Set container resources according to one common preset (allowed values: none, nano, micro, small, medium, large, xlarge, 2xlarge). This is ignored if resources is set (resources is recommended for production). | `small` | +| `kamajiControlPlane.controllerManager.resources` | Resources | `{}` | +| `kamajiControlPlane.controllerManager.resourcesPreset` | Set container resources according to one common preset (allowed values: none, nano, micro, small, medium, large, xlarge, 2xlarge). This is ignored if resources is set (resources is recommended for production). | `micro` | +| `kamajiControlPlane.scheduler.resources` | Resources | `{}` | +| `kamajiControlPlane.scheduler.resourcesPreset` | Set container resources according to one common preset (allowed values: none, nano, micro, small, medium, large, xlarge, 2xlarge). This is ignored if resources is set (resources is recommended for production). | `micro` | +| `kamajiControlPlane.addons.konnectivity.server.resources` | Resources | `{}` | +| `kamajiControlPlane.addons.konnectivity.server.resourcesPreset` | Set container resources according to one common preset (allowed values: none, nano, micro, small, medium, large, xlarge, 2xlarge). This is ignored if resources is set (resources is recommended for production). | `micro` | ## U Series diff --git a/packages/apps/kubernetes/templates/cluster.yaml b/packages/apps/kubernetes/templates/cluster.yaml index a208bc6e..385adacc 100644 --- a/packages/apps/kubernetes/templates/cluster.yaml +++ b/packages/apps/kubernetes/templates/cluster.yaml @@ -39,6 +39,13 @@ spec: sockets: 1 {{- end }} devices: + {{- if .group.gpus }} + gpus: + {{- range $i, $gpu := .group.gpus }} + - name: gpu{{ add $i 1 }} + deviceName: {{ $gpu.name }} + {{- end }} + {{- end }} disks: - name: system disk: diff --git a/packages/apps/kubernetes/templates/helmreleases/cert-manager-crds.yaml b/packages/apps/kubernetes/templates/helmreleases/cert-manager-crds.yaml index 8ee5dc82..04080631 100644 --- a/packages/apps/kubernetes/templates/helmreleases/cert-manager-crds.yaml +++ b/packages/apps/kubernetes/templates/helmreleases/cert-manager-crds.yaml @@ -4,7 +4,7 @@ metadata: name: {{ .Release.Name }}-cert-manager-crds labels: cozystack.io/repository: system - coztstack.io/target-cluster-name: {{ .Release.Name }} + cozystack.io/target-cluster-name: {{ .Release.Name }} spec: interval: 5m releaseName: cert-manager-crds diff --git a/packages/apps/kubernetes/templates/helmreleases/cert-manager.yaml b/packages/apps/kubernetes/templates/helmreleases/cert-manager.yaml index 8a7213a9..3d8bbaa0 100644 --- a/packages/apps/kubernetes/templates/helmreleases/cert-manager.yaml +++ b/packages/apps/kubernetes/templates/helmreleases/cert-manager.yaml @@ -5,7 +5,7 @@ metadata: name: {{ .Release.Name }}-cert-manager labels: cozystack.io/repository: system - coztstack.io/target-cluster-name: {{ .Release.Name }} + cozystack.io/target-cluster-name: {{ .Release.Name }} spec: interval: 5m releaseName: cert-manager diff --git a/packages/apps/kubernetes/templates/helmreleases/cilium.yaml b/packages/apps/kubernetes/templates/helmreleases/cilium.yaml index cf6bbe12..4edc3c24 100644 --- a/packages/apps/kubernetes/templates/helmreleases/cilium.yaml +++ b/packages/apps/kubernetes/templates/helmreleases/cilium.yaml @@ -4,7 +4,7 @@ metadata: name: {{ .Release.Name }}-cilium labels: cozystack.io/repository: system - coztstack.io/target-cluster-name: {{ .Release.Name }} + cozystack.io/target-cluster-name: {{ .Release.Name }} spec: interval: 5m releaseName: cilium diff --git a/packages/apps/kubernetes/templates/helmreleases/csi.yaml b/packages/apps/kubernetes/templates/helmreleases/csi.yaml index 2fe33509..ec6092f0 100644 --- a/packages/apps/kubernetes/templates/helmreleases/csi.yaml +++ b/packages/apps/kubernetes/templates/helmreleases/csi.yaml @@ -4,7 +4,7 @@ metadata: name: {{ .Release.Name }}-csi labels: cozystack.io/repository: system - coztstack.io/target-cluster-name: {{ .Release.Name }} + cozystack.io/target-cluster-name: {{ .Release.Name }} spec: interval: 5m releaseName: csi diff --git a/packages/apps/kubernetes/templates/helmreleases/delete.yaml b/packages/apps/kubernetes/templates/helmreleases/delete.yaml index 35cacedb..10a8d995 100644 --- a/packages/apps/kubernetes/templates/helmreleases/delete.yaml +++ b/packages/apps/kubernetes/templates/helmreleases/delete.yaml @@ -38,6 +38,7 @@ spec: {{ .Release.Name }}-ingress-nginx {{ .Release.Name }}-fluxcd-operator {{ .Release.Name }}-fluxcd + {{ .Release.Name }}-gpu-operator -p '{"spec": {"suspend": true}}' --type=merge --field-manager=flux-client-side-apply || true --- @@ -76,6 +77,7 @@ rules: - {{ .Release.Name }}-ingress-nginx - {{ .Release.Name }}-fluxcd-operator - {{ .Release.Name }}-fluxcd + - {{ .Release.Name }}-gpu-operator --- apiVersion: rbac.authorization.k8s.io/v1 kind: RoleBinding diff --git a/packages/apps/kubernetes/templates/helmreleases/fluxcd.yaml b/packages/apps/kubernetes/templates/helmreleases/fluxcd.yaml index c11629b9..1a4f982a 100644 --- a/packages/apps/kubernetes/templates/helmreleases/fluxcd.yaml +++ b/packages/apps/kubernetes/templates/helmreleases/fluxcd.yaml @@ -5,7 +5,7 @@ metadata: name: {{ .Release.Name }}-fluxcd-operator labels: cozystack.io/repository: system - coztstack.io/target-cluster-name: {{ .Release.Name }} + cozystack.io/target-cluster-name: {{ .Release.Name }} spec: interval: 5m releaseName: fluxcd-operator @@ -49,7 +49,7 @@ metadata: name: {{ .Release.Name }}-fluxcd labels: cozystack.io/repository: system - coztstack.io/target-cluster-name: {{ .Release.Name }} + cozystack.io/target-cluster-name: {{ .Release.Name }} spec: interval: 5m releaseName: fluxcd diff --git a/packages/apps/kubernetes/templates/helmreleases/gpu-operator.yaml b/packages/apps/kubernetes/templates/helmreleases/gpu-operator.yaml new file mode 100644 index 00000000..9a003e12 --- /dev/null +++ b/packages/apps/kubernetes/templates/helmreleases/gpu-operator.yaml @@ -0,0 +1,57 @@ +{{- if .Values.addons.gpuOperator.enabled }} +apiVersion: helm.toolkit.fluxcd.io/v2 +kind: HelmRelease +metadata: + name: {{ .Release.Name }}-gpu-operator + labels: + cozystack.io/repository: system + cozystack.io/target-cluster-name: {{ .Release.Name }} +spec: + interval: 5m + releaseName: gpu-operator + chart: + spec: + chart: cozy-gpu-operator + reconcileStrategy: Revision + sourceRef: + kind: HelmRepository + name: cozystack-system + namespace: cozy-system + kubeConfig: + secretRef: + name: {{ .Release.Name }}-admin-kubeconfig + key: super-admin.svc + targetNamespace: cozy-gpu-operator + storageNamespace: cozy-gpu-operator + install: + createNamespace: true + remediation: + retries: -1 + upgrade: + remediation: + retries: -1 + {{- if .Values.addons.gpuOperator.valuesOverride }} + valuesFrom: + - kind: Secret + name: {{ .Release.Name }}-gpu-operator-values-override + valuesKey: values + {{- end }} + + dependsOn: + {{- if lookup "helm.toolkit.fluxcd.io/v2" "HelmRelease" .Release.Namespace .Release.Name }} + - name: {{ .Release.Name }} + namespace: {{ .Release.Namespace }} + {{- end }} + - name: {{ .Release.Name }}-cilium + namespace: {{ .Release.Namespace }} +{{- end }} +{{- if .Values.addons.gpuOperator.valuesOverride }} +--- +apiVersion: v1 +kind: Secret +metadata: + name: {{ .Release.Name }}-gpu-operator-values-override +stringData: + values: | + {{- toYaml .Values.addons.gpuOperator.valuesOverride | nindent 4 }} +{{- end }} diff --git a/packages/apps/kubernetes/templates/helmreleases/ingress-nginx.yaml b/packages/apps/kubernetes/templates/helmreleases/ingress-nginx.yaml index 8daaf01f..bedb876a 100644 --- a/packages/apps/kubernetes/templates/helmreleases/ingress-nginx.yaml +++ b/packages/apps/kubernetes/templates/helmreleases/ingress-nginx.yaml @@ -5,7 +5,7 @@ metadata: name: {{ .Release.Name }}-ingress-nginx labels: cozystack.io/repository: system - coztstack.io/target-cluster-name: {{ .Release.Name }} + cozystack.io/target-cluster-name: {{ .Release.Name }} spec: interval: 5m releaseName: ingress-nginx diff --git a/packages/apps/kubernetes/templates/helmreleases/monitoring-agents.yaml b/packages/apps/kubernetes/templates/helmreleases/monitoring-agents.yaml index eb19f23b..dac132fc 100644 --- a/packages/apps/kubernetes/templates/helmreleases/monitoring-agents.yaml +++ b/packages/apps/kubernetes/templates/helmreleases/monitoring-agents.yaml @@ -7,7 +7,7 @@ metadata: name: {{ .Release.Name }}-monitoring-agents labels: cozystack.io/repository: system - coztstack.io/target-cluster-name: {{ .Release.Name }} + cozystack.io/target-cluster-name: {{ .Release.Name }} spec: interval: 5m releaseName: cozy-monitoring-agents diff --git a/packages/apps/kubernetes/templates/helmreleases/vertical-pod-autoscaler-crds.yaml b/packages/apps/kubernetes/templates/helmreleases/vertical-pod-autoscaler-crds.yaml index 89634565..9c901457 100644 --- a/packages/apps/kubernetes/templates/helmreleases/vertical-pod-autoscaler-crds.yaml +++ b/packages/apps/kubernetes/templates/helmreleases/vertical-pod-autoscaler-crds.yaml @@ -5,7 +5,7 @@ metadata: name: {{ .Release.Name }}-vertical-pod-autoscaler-crds labels: cozystack.io/repository: system - coztstack.io/target-cluster-name: {{ .Release.Name }} + cozystack.io/target-cluster-name: {{ .Release.Name }} spec: interval: 5m releaseName: vertical-pod-autoscaler-crds diff --git a/packages/apps/kubernetes/templates/helmreleases/vertical-pod-autoscaler.yaml b/packages/apps/kubernetes/templates/helmreleases/vertical-pod-autoscaler.yaml index d5e17079..fff634e3 100644 --- a/packages/apps/kubernetes/templates/helmreleases/vertical-pod-autoscaler.yaml +++ b/packages/apps/kubernetes/templates/helmreleases/vertical-pod-autoscaler.yaml @@ -7,7 +7,7 @@ metadata: name: {{ .Release.Name }}-vertical-pod-autoscaler labels: cozystack.io/repository: system - coztstack.io/target-cluster-name: {{ .Release.Name }} + cozystack.io/target-cluster-name: {{ .Release.Name }} spec: interval: 5m releaseName: vertical-pod-autoscaler diff --git a/packages/apps/kubernetes/templates/helmreleases/victoria-metrics-operator.yaml b/packages/apps/kubernetes/templates/helmreleases/victoria-metrics-operator.yaml index de19c968..e47ea4f3 100644 --- a/packages/apps/kubernetes/templates/helmreleases/victoria-metrics-operator.yaml +++ b/packages/apps/kubernetes/templates/helmreleases/victoria-metrics-operator.yaml @@ -5,7 +5,7 @@ metadata: name: {{ .Release.Name }}-cozy-victoria-metrics-operator labels: cozystack.io/repository: system - coztstack.io/target-cluster-name: {{ .Release.Name }} + cozystack.io/target-cluster-name: {{ .Release.Name }} spec: interval: 5m releaseName: cozy-victoria-metrics-operator diff --git a/packages/apps/kubernetes/values.schema.json b/packages/apps/kubernetes/values.schema.json index ed0de279..58e63f1e 100644 --- a/packages/apps/kubernetes/values.schema.json +++ b/packages/apps/kubernetes/values.schema.json @@ -12,7 +12,7 @@ "properties": { "replicas": { "type": "number", - "description": "Number of replicas for Kubernetes contorl-plane components", + "description": "Number of replicas for Kubernetes control-plane components", "default": 2 } } @@ -61,6 +61,21 @@ } } }, + "gpuOperator": { + "type": "object", + "properties": { + "enabled": { + "type": "boolean", + "description": "Enables the gpu-operator", + "default": false + }, + "valuesOverride": { + "type": "object", + "description": "Custom values to override", + "default": {} + } + } + }, "fluxcd": { "type": "object", "properties": { @@ -90,8 +105,93 @@ "default": {} } } + }, + "verticalPodAutoscaler": { + "type": "object", + "properties": { + "valuesOverride": { + "type": "object", + "description": "Custom values to override", + "default": {} + } + } + } + } + }, + "kamajiControlPlane": { + "type": "object", + "properties": { + "apiServer": { + "type": "object", + "properties": { + "resources": { + "type": "object", + "description": "Resources", + "default": {} + }, + "resourcesPreset": { + "type": "string", + "description": "Set container resources according to one common preset (allowed values: none, nano, micro, small, medium, large, xlarge, 2xlarge). This is ignored if resources is set (resources is recommended for production).", + "default": "small" + } + } + }, + "controllerManager": { + "type": "object", + "properties": { + "resources": { + "type": "object", + "description": "Resources", + "default": {} + }, + "resourcesPreset": { + "type": "string", + "description": "Set container resources according to one common preset (allowed values: none, nano, micro, small, medium, large, xlarge, 2xlarge). This is ignored if resources is set (resources is recommended for production).", + "default": "micro" + } + } + }, + "scheduler": { + "type": "object", + "properties": { + "resources": { + "type": "object", + "description": "Resources", + "default": {} + }, + "resourcesPreset": { + "type": "string", + "description": "Set container resources according to one common preset (allowed values: none, nano, micro, small, medium, large, xlarge, 2xlarge). This is ignored if resources is set (resources is recommended for production).", + "default": "micro" + } + } + }, + "addons": { + "type": "object", + "properties": { + "konnectivity": { + "type": "object", + "properties": { + "server": { + "type": "object", + "properties": { + "resources": { + "type": "object", + "description": "Resources", + "default": {} + }, + "resourcesPreset": { + "type": "string", + "description": "Set container resources according to one common preset (allowed values: none, nano, micro, small, medium, large, xlarge, 2xlarge). This is ignored if resources is set (resources is recommended for production).", + "default": "micro" + } + } + } + } + } + } } } } } -} +} \ No newline at end of file diff --git a/packages/apps/kubernetes/values.yaml b/packages/apps/kubernetes/values.yaml index 9fb60221..f18e43d2 100644 --- a/packages/apps/kubernetes/values.yaml +++ b/packages/apps/kubernetes/values.yaml @@ -1,7 +1,7 @@ ## @section Common parameters ## @param host The hostname used to access the Kubernetes cluster externally (defaults to using the cluster name as a subdomain for the tenant host). -## @param controlPlane.replicas Number of replicas for Kubernetes contorl-plane components +## @param controlPlane.replicas Number of replicas for Kubernetes control-plane components ## @param storageClass StorageClass used to store user data ## host: "" @@ -24,6 +24,14 @@ nodeGroups: cpu: "" memory: "" + ## List of GPUs to attach (WARN: NVIDIA driver requires at least 4 GiB of RAM) + ## e.g: + ## instanceType: "u1.xlarge" + ## gpus: + ## - name: nvidia.com/AD102GL_L40S + gpus: [] + + ## @section Cluster Addons ## addons: @@ -52,6 +60,14 @@ addons: hosts: [] valuesOverride: {} + ## GPU-operator: NVIDIA GPU Operator + ## + gpuOperator: + ## @param addons.gpuOperator.enabled Enables the gpu-operator + ## @param addons.gpuOperator.valuesOverride Custom values to override + enabled: false + valuesOverride: {} + ## Flux CD ## fluxcd: @@ -135,4 +151,4 @@ kamajiControlPlane: ## @param kamajiControlPlane.addons.konnectivity.server.resourcesPreset Set container resources according to one common preset (allowed values: none, nano, micro, small, medium, large, xlarge, 2xlarge). This is ignored if resources is set (resources is recommended for production). resourcesPreset: "micro" - \ No newline at end of file + diff --git a/packages/apps/versions_map b/packages/apps/versions_map index a1175417..8d0f9b60 100644 --- a/packages/apps/versions_map +++ b/packages/apps/versions_map @@ -59,7 +59,7 @@ kubernetes 0.16.0 077045b0 kubernetes 0.17.0 1fbbfcd0 kubernetes 0.17.1 fd240701 kubernetes 0.18.0 721c12a7 -kubernetes 0.18.1 HEAD +kubernetes 0.19.0 HEAD mysql 0.1.0 263e47be mysql 0.2.0 c24a103f mysql 0.3.0 53f2365e