From cac81f3cceb8b454e1fe31e33313a308f69537f8 Mon Sep 17 00:00:00 2001 From: Vegard Hagen Date: Sat, 11 Nov 2023 10:21:26 +0100 Subject: [PATCH] feat(gpu): Change from nvidia-gpu-operator to nvidia-device-plugin The simpler nvidia-device-plugin chart covers my needs and is easier to get working. --- .../cm-device-plugin-time-slicing.yaml | 15 --------------- infra/gpu-operator/kustomization.yaml | 15 --------------- infra/gpu-operator/values.yaml | 13 ------------- infra/nvidia-device-plugin/cm-time-slicing.yaml | 12 ++++++++++++ infra/nvidia-device-plugin/kustomization.yaml | 16 ++++++++++++++++ .../namespace.yaml | 2 +- infra/nvidia-device-plugin/values.yaml | 3 +++ infra/project.yaml | 4 +++- 8 files changed, 35 insertions(+), 45 deletions(-) delete mode 100644 infra/gpu-operator/cm-device-plugin-time-slicing.yaml delete mode 100644 infra/gpu-operator/kustomization.yaml delete mode 100644 infra/gpu-operator/values.yaml create mode 100644 infra/nvidia-device-plugin/cm-time-slicing.yaml create mode 100644 infra/nvidia-device-plugin/kustomization.yaml rename infra/{gpu-operator => nvidia-device-plugin}/namespace.yaml (58%) create mode 100644 infra/nvidia-device-plugin/values.yaml diff --git a/infra/gpu-operator/cm-device-plugin-time-slicing.yaml b/infra/gpu-operator/cm-device-plugin-time-slicing.yaml deleted file mode 100644 index e507227..0000000 --- a/infra/gpu-operator/cm-device-plugin-time-slicing.yaml +++ /dev/null @@ -1,15 +0,0 @@ -apiVersion: v1 -kind: ConfigMap -metadata: - name: device-plugin-time-slicing - namespace: gpu-operator -data: - any: |- - version: v1 - flags: - migStrategy: none - sharing: - timeSlicing: - resources: - - name: nvidia.com/gpu - replicas: 10 \ No newline at end of file diff --git a/infra/gpu-operator/kustomization.yaml b/infra/gpu-operator/kustomization.yaml deleted file mode 100644 index 6dfea15..0000000 --- a/infra/gpu-operator/kustomization.yaml +++ /dev/null @@ -1,15 +0,0 @@ -apiVersion: kustomize.config.k8s.io/v1beta1 -kind: Kustomization -namespace: gpu-operator - -resources: - - namespace.yaml - - cm-device-plugin-time-slicing.yaml - -helmCharts: - - name: gpu-operator - repo: https://helm.ngc.nvidia.com/nvidia - version: 23.9.0 - releaseName: "gpu-operator" - includeCRDs: true - valuesFile: values.yaml \ No newline at end of file diff --git a/infra/gpu-operator/values.yaml b/infra/gpu-operator/values.yaml deleted file mode 100644 index 74158df..0000000 --- a/infra/gpu-operator/values.yaml +++ /dev/null @@ -1,13 +0,0 @@ -driver: - enabled: false -toolkit: - enabled: false - -devicePlugin: - config: - name: device-plugin-time-slicing - default: any - -#cdi: -# enabled: true -# default: true diff --git a/infra/nvidia-device-plugin/cm-time-slicing.yaml b/infra/nvidia-device-plugin/cm-time-slicing.yaml new file mode 100644 index 0000000..4d98668 --- /dev/null +++ b/infra/nvidia-device-plugin/cm-time-slicing.yaml @@ -0,0 +1,12 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: time-slicing +data: + default: |- + version: v1 + sharing: + timeSlicing: + resources: + - name: nvidia.com/gpu + replicas: 10 diff --git a/infra/nvidia-device-plugin/kustomization.yaml b/infra/nvidia-device-plugin/kustomization.yaml new file mode 100644 index 0000000..e36fefb --- /dev/null +++ b/infra/nvidia-device-plugin/kustomization.yaml @@ -0,0 +1,16 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +namespace: nvidia-device-plugin + +resources: + - namespace.yaml + - cm-time-slicing.yaml + +helmCharts: + - name: nvidia-device-plugin + repo: https://nvidia.github.io/k8s-device-plugin + version: 0.14.2 + releaseName: "nvidia-device-plugin" + namespace: nvidia-device-plugin + includeCRDs: true + valuesFile: values.yaml diff --git a/infra/gpu-operator/namespace.yaml b/infra/nvidia-device-plugin/namespace.yaml similarity index 58% rename from infra/gpu-operator/namespace.yaml rename to infra/nvidia-device-plugin/namespace.yaml index 4c6b858..4a6b3ab 100644 --- a/infra/gpu-operator/namespace.yaml +++ b/infra/nvidia-device-plugin/namespace.yaml @@ -1,4 +1,4 @@ apiVersion: v1 kind: Namespace metadata: - name: gpu-operator \ No newline at end of file + name: nvidia-device-plugin diff --git a/infra/nvidia-device-plugin/values.yaml b/infra/nvidia-device-plugin/values.yaml new file mode 100644 index 0000000..f535040 --- /dev/null +++ b/infra/nvidia-device-plugin/values.yaml @@ -0,0 +1,3 @@ +config: + name: time-slicing + default: default \ No newline at end of file diff --git a/infra/project.yaml b/infra/project.yaml index 9f6b3da..a2f774b 100644 --- a/infra/project.yaml +++ b/infra/project.yaml @@ -24,6 +24,8 @@ spec: server: '*' - namespace: 'monitoring' server: '*' + - namespace: 'nvidia-device-plugin' + server: '*' - namespace: 'pi-hole' server: '*' - namespace: 'net-aux' @@ -34,4 +36,4 @@ spec: server: '*' clusterResourceWhitelist: - group: '*' - kind: '*' \ No newline at end of file + kind: '*'