feat(gpu): Change from nvidia-gpu-operator to nvidia-device-plugin

The simpler nvidia-device-plugin chart covers my needs and is easier to
get working.
This commit is contained in:
Vegard Hagen
2023-11-11 10:21:26 +01:00
parent 8667f835b3
commit cac81f3cce
8 changed files with 35 additions and 45 deletions

View File

@@ -1,15 +0,0 @@
apiVersion: v1
kind: ConfigMap
metadata:
name: device-plugin-time-slicing
namespace: gpu-operator
data:
any: |-
version: v1
flags:
migStrategy: none
sharing:
timeSlicing:
resources:
- name: nvidia.com/gpu
replicas: 10

View File

@@ -1,15 +0,0 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
namespace: gpu-operator
resources:
- namespace.yaml
- cm-device-plugin-time-slicing.yaml
helmCharts:
- name: gpu-operator
repo: https://helm.ngc.nvidia.com/nvidia
version: 23.9.0
releaseName: "gpu-operator"
includeCRDs: true
valuesFile: values.yaml

View File

@@ -1,13 +0,0 @@
driver:
enabled: false
toolkit:
enabled: false
devicePlugin:
config:
name: device-plugin-time-slicing
default: any
#cdi:
# enabled: true
# default: true

View File

@@ -0,0 +1,12 @@
apiVersion: v1
kind: ConfigMap
metadata:
name: time-slicing
data:
default: |-
version: v1
sharing:
timeSlicing:
resources:
- name: nvidia.com/gpu
replicas: 10

View File

@@ -0,0 +1,16 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
namespace: nvidia-device-plugin
resources:
- namespace.yaml
- cm-time-slicing.yaml
helmCharts:
- name: nvidia-device-plugin
repo: https://nvidia.github.io/k8s-device-plugin
version: 0.14.2
releaseName: "nvidia-device-plugin"
namespace: nvidia-device-plugin
includeCRDs: true
valuesFile: values.yaml

View File

@@ -1,4 +1,4 @@
apiVersion: v1
kind: Namespace
metadata:
name: gpu-operator
name: nvidia-device-plugin

View File

@@ -0,0 +1,3 @@
config:
name: time-slicing
default: default

View File

@@ -24,6 +24,8 @@ spec:
server: '*'
- namespace: 'monitoring'
server: '*'
- namespace: 'nvidia-device-plugin'
server: '*'
- namespace: 'pi-hole'
server: '*'
- namespace: 'net-aux'
@@ -34,4 +36,4 @@ spec:
server: '*'
clusterResourceWhitelist:
- group: '*'
kind: '*'
kind: '*'