mirror of
				https://github.com/optim-enterprises-bv/kubernetes.git
				synced 2025-11-03 19:58:17 +00:00 
			
		
		
		
	Extend test/e2e/scheduling/nvidia-gpus.go to include a device plugin based nvidia gpu e2e test.
This commit is contained in:
		@@ -42,8 +42,14 @@ const (
 | 
			
		||||
	cosOSImage        = "Container-Optimized OS from Google"
 | 
			
		||||
	// Nvidia driver installation can take upwards of 5 minutes.
 | 
			
		||||
	driverInstallTimeout = 10 * time.Minute
 | 
			
		||||
	// Nvidia COS driver installer daemonset.
 | 
			
		||||
	cosNvidiaDriverInstallerUrl = "https://raw.githubusercontent.com/ContainerEngine/accelerators/stable/cos-nvidia-gpu-installer/daemonset.yaml"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
type podCreationFuncType func() *v1.Pod
 | 
			
		||||
 | 
			
		||||
var (
 | 
			
		||||
	gpuResourceName v1.ResourceName
 | 
			
		||||
	dsYamlUrl       string
 | 
			
		||||
	podCreationFunc podCreationFuncType
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
func makeCudaAdditionTestPod() *v1.Pod {
 | 
			
		||||
@@ -60,7 +66,7 @@ func makeCudaAdditionTestPod() *v1.Pod {
 | 
			
		||||
					Image: imageutils.GetE2EImage(imageutils.CudaVectorAdd),
 | 
			
		||||
					Resources: v1.ResourceRequirements{
 | 
			
		||||
						Limits: v1.ResourceList{
 | 
			
		||||
							v1.ResourceNvidiaGPU: *resource.NewQuantity(1, resource.DecimalSI),
 | 
			
		||||
							gpuResourceName: *resource.NewQuantity(1, resource.DecimalSI),
 | 
			
		||||
						},
 | 
			
		||||
					},
 | 
			
		||||
					VolumeMounts: []v1.VolumeMount{
 | 
			
		||||
@@ -86,6 +92,30 @@ func makeCudaAdditionTestPod() *v1.Pod {
 | 
			
		||||
	return testPod
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func makeCudaAdditionDevicePluginTestPod() *v1.Pod {
 | 
			
		||||
	podName := testPodNamePrefix + string(uuid.NewUUID())
 | 
			
		||||
	testPod := &v1.Pod{
 | 
			
		||||
		ObjectMeta: metav1.ObjectMeta{
 | 
			
		||||
			Name: podName,
 | 
			
		||||
		},
 | 
			
		||||
		Spec: v1.PodSpec{
 | 
			
		||||
			RestartPolicy: v1.RestartPolicyNever,
 | 
			
		||||
			Containers: []v1.Container{
 | 
			
		||||
				{
 | 
			
		||||
					Name:  "vector-addition",
 | 
			
		||||
					Image: imageutils.GetE2EImage(imageutils.CudaVectorAdd),
 | 
			
		||||
					Resources: v1.ResourceRequirements{
 | 
			
		||||
						Limits: v1.ResourceList{
 | 
			
		||||
							gpuResourceName: *resource.NewQuantity(1, resource.DecimalSI),
 | 
			
		||||
						},
 | 
			
		||||
					},
 | 
			
		||||
				},
 | 
			
		||||
			},
 | 
			
		||||
		},
 | 
			
		||||
	}
 | 
			
		||||
	return testPod
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func isClusterRunningCOS(f *framework.Framework) bool {
 | 
			
		||||
	nodeList, err := f.ClientSet.CoreV1().Nodes().List(metav1.ListOptions{})
 | 
			
		||||
	framework.ExpectNoError(err, "getting node list")
 | 
			
		||||
@@ -105,7 +135,8 @@ func areGPUsAvailableOnAllSchedulableNodes(f *framework.Framework) bool {
 | 
			
		||||
		if node.Spec.Unschedulable {
 | 
			
		||||
			continue
 | 
			
		||||
		}
 | 
			
		||||
		if node.Status.Capacity.NvidiaGPU().Value() == 0 {
 | 
			
		||||
		framework.Logf("gpuResourceName %s", gpuResourceName)
 | 
			
		||||
		if val, ok := node.Status.Capacity[gpuResourceName]; !ok || val.Value() == 0 {
 | 
			
		||||
			framework.Logf("Nvidia GPUs not available on Node: %q", node.Name)
 | 
			
		||||
			return false
 | 
			
		||||
		}
 | 
			
		||||
@@ -119,7 +150,9 @@ func getGPUsAvailable(f *framework.Framework) int64 {
 | 
			
		||||
	framework.ExpectNoError(err, "getting node list")
 | 
			
		||||
	var gpusAvailable int64
 | 
			
		||||
	for _, node := range nodeList.Items {
 | 
			
		||||
		gpusAvailable += node.Status.Capacity.NvidiaGPU().Value()
 | 
			
		||||
		if val, ok := node.Status.Capacity[gpuResourceName]; ok {
 | 
			
		||||
			gpusAvailable += (&val).Value()
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
	return gpusAvailable
 | 
			
		||||
}
 | 
			
		||||
@@ -133,10 +166,21 @@ func testNvidiaGPUsOnCOS(f *framework.Framework) {
 | 
			
		||||
		Skip("Nvidia GPU tests are supproted only on Container Optimized OS image currently")
 | 
			
		||||
	}
 | 
			
		||||
	framework.Logf("Cluster is running on COS. Proceeding with test")
 | 
			
		||||
 | 
			
		||||
	if f.BaseName == "device-plugin-gpus" {
 | 
			
		||||
		dsYamlUrl = "https://raw.githubusercontent.com/GoogleCloudPlatform/container-engine-accelerators/master/device-plugin-daemonset.yaml"
 | 
			
		||||
		gpuResourceName = "nvidia.com/gpu"
 | 
			
		||||
		podCreationFunc = makeCudaAdditionDevicePluginTestPod
 | 
			
		||||
	} else {
 | 
			
		||||
		dsYamlUrl = "https://raw.githubusercontent.com/ContainerEngine/accelerators/master/cos-nvidia-gpu-installer/daemonset.yaml"
 | 
			
		||||
		gpuResourceName = v1.ResourceNvidiaGPU
 | 
			
		||||
		podCreationFunc = makeCudaAdditionTestPod
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	// GPU drivers might have already been installed.
 | 
			
		||||
	if !areGPUsAvailableOnAllSchedulableNodes(f) {
 | 
			
		||||
		// Install Nvidia Drivers.
 | 
			
		||||
		ds := dsFromManifest(cosNvidiaDriverInstallerUrl)
 | 
			
		||||
		ds := dsFromManifest(dsYamlUrl)
 | 
			
		||||
		ds.Namespace = f.Namespace.Name
 | 
			
		||||
		_, err := f.ClientSet.Extensions().DaemonSets(f.Namespace.Name).Create(ds)
 | 
			
		||||
		framework.ExpectNoError(err, "failed to create daemonset")
 | 
			
		||||
@@ -149,7 +193,7 @@ func testNvidiaGPUsOnCOS(f *framework.Framework) {
 | 
			
		||||
	framework.Logf("Creating as many pods as there are Nvidia GPUs and have the pods run a CUDA app")
 | 
			
		||||
	podList := []*v1.Pod{}
 | 
			
		||||
	for i := int64(0); i < getGPUsAvailable(f); i++ {
 | 
			
		||||
		podList = append(podList, f.PodClient().Create(makeCudaAdditionTestPod()))
 | 
			
		||||
		podList = append(podList, f.PodClient().Create(podCreationFunc()))
 | 
			
		||||
	}
 | 
			
		||||
	framework.Logf("Wait for all test pods to succeed")
 | 
			
		||||
	// Wait for all pods to succeed
 | 
			
		||||
@@ -192,3 +236,10 @@ var _ = SIGDescribe("[Feature:GPU]", func() {
 | 
			
		||||
		testNvidiaGPUsOnCOS(f)
 | 
			
		||||
	})
 | 
			
		||||
})
 | 
			
		||||
 | 
			
		||||
var _ = SIGDescribe("[Feature:GPUDevicePlugin]", func() {
 | 
			
		||||
	f := framework.NewDefaultFramework("device-plugin-gpus")
 | 
			
		||||
	It("run Nvidia GPU Device Plugin tests on Container Optimized OS only", func() {
 | 
			
		||||
		testNvidiaGPUsOnCOS(f)
 | 
			
		||||
	})
 | 
			
		||||
})
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user