mirror of
				https://github.com/optim-enterprises-bv/kubernetes.git
				synced 2025-10-31 18:28:13 +00:00 
			
		
		
		
	Remove remants of broken stuff - nvidia/autoscaling
Signed-off-by: Davanum Srinivas <davanum@gmail.com>
This commit is contained in:
		| @@ -1,57 +0,0 @@ | ||||
| apiVersion: apps/v1 | ||||
| kind: DaemonSet | ||||
| metadata: | ||||
|   name: nvidia-gpu-device-plugin | ||||
|   namespace: kube-system | ||||
|   labels: | ||||
|     k8s-app: nvidia-gpu-device-plugin | ||||
|     addonmanager.kubernetes.io/mode: EnsureExists | ||||
| spec: | ||||
|   selector: | ||||
|     matchLabels: | ||||
|       k8s-app: nvidia-gpu-device-plugin | ||||
|   template: | ||||
|     metadata: | ||||
|       labels: | ||||
|         k8s-app: nvidia-gpu-device-plugin | ||||
|     spec: | ||||
|       priorityClassName: system-node-critical | ||||
|       affinity: | ||||
|         nodeAffinity: | ||||
|           requiredDuringSchedulingIgnoredDuringExecution: | ||||
|             nodeSelectorTerms: | ||||
|             - matchExpressions: | ||||
|               - key: cloud.google.com/gke-accelerator | ||||
|                 operator: Exists | ||||
|       tolerations: | ||||
|       - operator: "Exists" | ||||
|         effect: "NoExecute" | ||||
|       - operator: "Exists" | ||||
|         effect: "NoSchedule" | ||||
|       volumes: | ||||
|       - name: device-plugin | ||||
|         hostPath: | ||||
|           path: /var/lib/kubelet/device-plugins | ||||
|       - name: dev | ||||
|         hostPath: | ||||
|           path: /dev | ||||
|       containers: | ||||
|       - image: "registry.k8s.io/nvidia-gpu-device-plugin@sha256:4b036e8844920336fa48f36edeb7d4398f426d6a934ba022848deed2edbf09aa" | ||||
|         command: ["/usr/bin/nvidia-gpu-device-plugin", "-logtostderr"] | ||||
|         name: nvidia-gpu-device-plugin | ||||
|         resources: | ||||
|           requests: | ||||
|             cpu: 50m | ||||
|             memory: 10Mi | ||||
|           limits: | ||||
|             cpu: 50m | ||||
|             memory: 10Mi | ||||
|         securityContext: | ||||
|           privileged: true | ||||
|         volumeMounts: | ||||
|         - name: device-plugin | ||||
|           mountPath: /device-plugin | ||||
|         - name: dev | ||||
|           mountPath: /dev | ||||
|   updateStrategy: | ||||
|     type: RollingUpdate | ||||
| @@ -2943,9 +2943,6 @@ EOF | ||||
|     sed -i -e "s@{{ metrics_server_memory_per_node }}@${metrics_server_memory_per_node}@g" "${metrics_server_yaml}" | ||||
|     sed -i -e "s@{{ metrics_server_min_cluster_size }}@${metrics_server_min_cluster_size}@g" "${metrics_server_yaml}" | ||||
|   fi | ||||
|   if [[ "${ENABLE_NVIDIA_GPU_DEVICE_PLUGIN:-}" == "true" ]]; then | ||||
|     setup-addon-manifests "addons" "device-plugins/nvidia-gpu" | ||||
|   fi | ||||
|   # Setting up the konnectivity-agent daemonset | ||||
|   if [[ "${RUN_KONNECTIVITY_PODS:-false}" == "true" ]]; then | ||||
|     setup-addon-manifests "addons" "konnectivity-agent" | ||||
|   | ||||
| @@ -1512,11 +1512,6 @@ EOF | ||||
|     if [ -n "${CLUSTER_SIGNING_DURATION:-}" ]; then | ||||
|       cat >>"$file" <<EOF | ||||
| CLUSTER_SIGNING_DURATION: $(yaml-quote "${CLUSTER_SIGNING_DURATION}") | ||||
| EOF | ||||
|     fi | ||||
|     if [[ "${NODE_ACCELERATORS:-}" == *"type=nvidia"* ]]; then | ||||
|       cat >>"$file" <<EOF | ||||
| ENABLE_NVIDIA_GPU_DEVICE_PLUGIN: $(yaml-quote "true") | ||||
| EOF | ||||
|     fi | ||||
|     if [ -n "${ADDON_MANAGER_LEADER_ELECTION:-}" ]; then | ||||
|   | ||||
| @@ -1,125 +0,0 @@ | ||||
| /* | ||||
| Copyright 2017 The Kubernetes Authors. | ||||
|  | ||||
| Licensed under the Apache License, Version 2.0 (the "License"); | ||||
| you may not use this file except in compliance with the License. | ||||
| You may obtain a copy of the License at | ||||
|  | ||||
|     http://www.apache.org/licenses/LICENSE-2.0 | ||||
|  | ||||
| Unless required by applicable law or agreed to in writing, software | ||||
| distributed under the License is distributed on an "AS IS" BASIS, | ||||
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
| See the License for the specific language governing permissions and | ||||
| limitations under the License. | ||||
| */ | ||||
|  | ||||
| package autoscaling | ||||
|  | ||||
| import ( | ||||
| 	"context" | ||||
| 	"strings" | ||||
| 	"time" | ||||
|  | ||||
| 	v1 "k8s.io/api/core/v1" | ||||
| 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" | ||||
| 	"k8s.io/kubernetes/test/e2e/feature" | ||||
| 	"k8s.io/kubernetes/test/e2e/framework" | ||||
| 	e2eautoscaling "k8s.io/kubernetes/test/e2e/framework/autoscaling" | ||||
| 	e2enode "k8s.io/kubernetes/test/e2e/framework/node" | ||||
| 	e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper" | ||||
| 	admissionapi "k8s.io/pod-security-admission/api" | ||||
|  | ||||
| 	"github.com/onsi/ginkgo/v2" | ||||
| 	"github.com/onsi/gomega" | ||||
| 	"github.com/onsi/gomega/gmeasure" | ||||
| ) | ||||
|  | ||||
| var _ = SIGDescribe(feature.ClusterSizeAutoscalingScaleUp, framework.WithSlow(), "Autoscaling", func() { | ||||
| 	f := framework.NewDefaultFramework("autoscaling") | ||||
| 	f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged | ||||
| 	var experiment *gmeasure.Experiment | ||||
|  | ||||
| 	ginkgo.Describe("Autoscaling a service", func() { | ||||
| 		ginkgo.BeforeEach(func(ctx context.Context) { | ||||
| 			// Check if Cloud Autoscaler is enabled by trying to get its ConfigMap. | ||||
| 			_, err := f.ClientSet.CoreV1().ConfigMaps("kube-system").Get(ctx, "cluster-autoscaler-status", metav1.GetOptions{}) | ||||
| 			if err != nil { | ||||
| 				e2eskipper.Skipf("test expects Cluster Autoscaler to be enabled") | ||||
| 			} | ||||
| 			experiment = gmeasure.NewExperiment("Autoscaling a service") | ||||
| 			ginkgo.AddReportEntry(experiment.Name, experiment) | ||||
| 		}) | ||||
|  | ||||
| 		ginkgo.Context("from 1 pod and 3 nodes to 8 pods and >=4 nodes", func() { | ||||
| 			const nodesNum = 3       // Expect there to be 3 nodes before and after the test. | ||||
| 			var nodeGroupName string // Set by BeforeEach, used by AfterEach to scale this node group down after the test. | ||||
| 			var nodes *v1.NodeList   // Set by BeforeEach, used by Measure to calculate CPU request based on node's sizes. | ||||
|  | ||||
| 			ginkgo.BeforeEach(func(ctx context.Context) { | ||||
| 				// Make sure there is only 1 node group, otherwise this test becomes useless. | ||||
| 				nodeGroups := strings.Split(framework.TestContext.CloudConfig.NodeInstanceGroup, ",") | ||||
| 				if len(nodeGroups) != 1 { | ||||
| 					e2eskipper.Skipf("test expects 1 node group, found %d", len(nodeGroups)) | ||||
| 				} | ||||
| 				nodeGroupName = nodeGroups[0] | ||||
|  | ||||
| 				// Make sure the node group has exactly 'nodesNum' nodes, otherwise this test becomes useless. | ||||
| 				nodeGroupSize, err := framework.GroupSize(nodeGroupName) | ||||
| 				framework.ExpectNoError(err) | ||||
| 				if nodeGroupSize != nodesNum { | ||||
| 					e2eskipper.Skipf("test expects %d nodes, found %d", nodesNum, nodeGroupSize) | ||||
| 				} | ||||
|  | ||||
| 				// Make sure all nodes are schedulable, otherwise we are in some kind of a problem state. | ||||
| 				nodes, err = e2enode.GetReadySchedulableNodes(ctx, f.ClientSet) | ||||
| 				framework.ExpectNoError(err) | ||||
| 				gomega.Expect(nodes.Items).To(gomega.HaveLen(nodeGroupSize), "not all nodes are schedulable") | ||||
| 			}) | ||||
|  | ||||
| 			ginkgo.AfterEach(func(ctx context.Context) { | ||||
| 				// Attempt cleanup only if a node group was targeted for scale up. | ||||
| 				// Otherwise the test was probably skipped and we'll get a gcloud error due to invalid parameters. | ||||
| 				if len(nodeGroupName) > 0 { | ||||
| 					// Scale down back to only 'nodesNum' nodes, as expected at the start of the test. | ||||
| 					framework.ExpectNoError(framework.ResizeGroup(nodeGroupName, nodesNum)) | ||||
| 					framework.ExpectNoError(e2enode.WaitForReadyNodes(ctx, f.ClientSet, nodesNum, 15*time.Minute)) | ||||
| 				} | ||||
| 			}) | ||||
|  | ||||
| 			ginkgo.It("takes less than 15 minutes", func(ctx context.Context) { | ||||
| 				// Measured over multiple samples, scaling takes 10 +/- 2 minutes, so 15 minutes should be fully sufficient. | ||||
| 				const timeToWait = 15 * time.Minute | ||||
|  | ||||
| 				// Calculate the CPU request of the service. | ||||
| 				// This test expects that 8 pods will not fit in 'nodesNum' nodes, but will fit in >='nodesNum'+1 nodes. | ||||
| 				// Make it so that 'nodesNum' pods fit perfectly per node. | ||||
| 				nodeCpus := nodes.Items[0].Status.Allocatable[v1.ResourceCPU] | ||||
| 				nodeCPUMillis := (&nodeCpus).MilliValue() | ||||
| 				cpuRequestMillis := int64(nodeCPUMillis / nodesNum) | ||||
|  | ||||
| 				// Start the service we want to scale and wait for it to be up and running. | ||||
| 				nodeMemoryBytes := nodes.Items[0].Status.Allocatable[v1.ResourceMemory] | ||||
| 				nodeMemoryMB := (&nodeMemoryBytes).Value() / 1024 / 1024 | ||||
| 				memRequestMB := nodeMemoryMB / 10 // Ensure each pod takes not more than 10% of node's allocatable memory. | ||||
| 				replicas := 1 | ||||
| 				resourceConsumer := e2eautoscaling.NewDynamicResourceConsumer(ctx, "resource-consumer", f.Namespace.Name, e2eautoscaling.KindDeployment, replicas, 0, 0, 0, cpuRequestMillis, memRequestMB, f.ClientSet, f.ScalesGetter, e2eautoscaling.Disable, e2eautoscaling.Idle) | ||||
| 				ginkgo.DeferCleanup(resourceConsumer.CleanUp) | ||||
| 				resourceConsumer.WaitForReplicas(ctx, replicas, 1*time.Minute) // Should finish ~immediately, so 1 minute is more than enough. | ||||
|  | ||||
| 				// Enable Horizontal Pod Autoscaler with 50% target utilization and | ||||
| 				// scale up the CPU usage to trigger autoscaling to 8 pods for target to be satisfied. | ||||
| 				targetCPUUtilizationPercent := int32(50) | ||||
| 				hpa := e2eautoscaling.CreateCPUResourceHorizontalPodAutoscaler(ctx, resourceConsumer, targetCPUUtilizationPercent, 1, 10) | ||||
| 				ginkgo.DeferCleanup(e2eautoscaling.DeleteHorizontalPodAutoscaler, resourceConsumer, hpa.Name) | ||||
| 				cpuLoad := 8 * cpuRequestMillis * int64(targetCPUUtilizationPercent) / 100 // 8 pods utilized to the target level | ||||
| 				resourceConsumer.ConsumeCPU(int(cpuLoad)) | ||||
|  | ||||
| 				// Measure the time it takes for the service to scale to 8 pods with 50% CPU utilization each. | ||||
| 				experiment.SampleDuration("total scale-up time", func(idx int) { | ||||
| 					resourceConsumer.WaitForReplicas(ctx, 8, timeToWait) | ||||
| 				}, gmeasure.SamplingConfig{N: 1}) | ||||
| 			}) // Increase to run the test more than once. | ||||
| 		}) | ||||
| 	}) | ||||
| }) | ||||
| @@ -1,531 +0,0 @@ | ||||
| /* | ||||
| Copyright 2016 The Kubernetes Authors. | ||||
|  | ||||
| Licensed under the Apache License, Version 2.0 (the "License"); | ||||
| you may not use this file except in compliance with the License. | ||||
| You may obtain a copy of the License at | ||||
|  | ||||
|     http://www.apache.org/licenses/LICENSE-2.0 | ||||
|  | ||||
| Unless required by applicable law or agreed to in writing, software | ||||
| distributed under the License is distributed on an "AS IS" BASIS, | ||||
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
| See the License for the specific language governing permissions and | ||||
| limitations under the License. | ||||
| */ | ||||
|  | ||||
| package autoscaling | ||||
|  | ||||
| import ( | ||||
| 	"context" | ||||
| 	"encoding/json" | ||||
| 	"fmt" | ||||
| 	"math" | ||||
| 	"strings" | ||||
| 	"time" | ||||
|  | ||||
| 	v1 "k8s.io/api/core/v1" | ||||
| 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" | ||||
| 	"k8s.io/apimachinery/pkg/fields" | ||||
| 	"k8s.io/apimachinery/pkg/types" | ||||
| 	"k8s.io/apimachinery/pkg/util/strategicpatch" | ||||
| 	clientset "k8s.io/client-go/kubernetes" | ||||
| 	"k8s.io/klog/v2" | ||||
| 	"k8s.io/kubernetes/test/e2e/feature" | ||||
| 	"k8s.io/kubernetes/test/e2e/framework" | ||||
| 	e2enode "k8s.io/kubernetes/test/e2e/framework/node" | ||||
| 	e2erc "k8s.io/kubernetes/test/e2e/framework/rc" | ||||
| 	e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper" | ||||
| 	testutils "k8s.io/kubernetes/test/utils" | ||||
| 	imageutils "k8s.io/kubernetes/test/utils/image" | ||||
| 	admissionapi "k8s.io/pod-security-admission/api" | ||||
|  | ||||
| 	"github.com/onsi/ginkgo/v2" | ||||
| 	"github.com/onsi/gomega" | ||||
| ) | ||||
|  | ||||
| const ( | ||||
| 	memoryReservationTimeout = 5 * time.Minute | ||||
| 	largeResizeTimeout       = 8 * time.Minute | ||||
| 	largeScaleUpTimeout      = 10 * time.Minute | ||||
| 	maxNodes                 = 1000 | ||||
| ) | ||||
|  | ||||
| type clusterPredicates struct { | ||||
| 	nodes int | ||||
| } | ||||
|  | ||||
| type scaleUpTestConfig struct { | ||||
| 	initialNodes   int | ||||
| 	initialPods    int | ||||
| 	extraPods      *testutils.RCConfig | ||||
| 	expectedResult *clusterPredicates | ||||
| } | ||||
|  | ||||
| var _ = SIGDescribe("Cluster size autoscaler scalability", framework.WithSlow(), func() { | ||||
| 	f := framework.NewDefaultFramework("autoscaling") | ||||
| 	f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged | ||||
| 	var c clientset.Interface | ||||
| 	var nodeCount int | ||||
| 	var coresPerNode int | ||||
| 	var memCapacityMb int | ||||
| 	var originalSizes map[string]int | ||||
| 	var sum int | ||||
|  | ||||
| 	ginkgo.BeforeEach(func(ctx context.Context) { | ||||
| 		e2eskipper.SkipUnlessProviderIs("gce", "gke", "kubemark") | ||||
|  | ||||
| 		// Check if Cloud Autoscaler is enabled by trying to get its ConfigMap. | ||||
| 		_, err := f.ClientSet.CoreV1().ConfigMaps("kube-system").Get(ctx, "cluster-autoscaler-status", metav1.GetOptions{}) | ||||
| 		if err != nil { | ||||
| 			e2eskipper.Skipf("test expects Cluster Autoscaler to be enabled") | ||||
| 		} | ||||
|  | ||||
| 		c = f.ClientSet | ||||
| 		if originalSizes == nil { | ||||
| 			originalSizes = make(map[string]int) | ||||
| 			sum = 0 | ||||
| 			for _, mig := range strings.Split(framework.TestContext.CloudConfig.NodeInstanceGroup, ",") { | ||||
| 				size, err := framework.GroupSize(mig) | ||||
| 				framework.ExpectNoError(err) | ||||
| 				ginkgo.By(fmt.Sprintf("Initial size of %s: %d", mig, size)) | ||||
| 				originalSizes[mig] = size | ||||
| 				sum += size | ||||
| 			} | ||||
| 		} | ||||
|  | ||||
| 		framework.ExpectNoError(e2enode.WaitForReadyNodes(ctx, c, sum, scaleUpTimeout)) | ||||
|  | ||||
| 		nodes, err := e2enode.GetReadySchedulableNodes(ctx, f.ClientSet) | ||||
| 		framework.ExpectNoError(err) | ||||
| 		nodeCount = len(nodes.Items) | ||||
| 		cpu := nodes.Items[0].Status.Capacity[v1.ResourceCPU] | ||||
| 		mem := nodes.Items[0].Status.Capacity[v1.ResourceMemory] | ||||
| 		coresPerNode = int((&cpu).MilliValue() / 1000) | ||||
| 		memCapacityMb = int((&mem).Value() / 1024 / 1024) | ||||
|  | ||||
| 		gomega.Expect(nodeCount).To(gomega.Equal(sum)) | ||||
|  | ||||
| 		if framework.ProviderIs("gke") { | ||||
| 			val, err := isAutoscalerEnabled(3) | ||||
| 			framework.ExpectNoError(err) | ||||
| 			if !val { | ||||
| 				err = enableAutoscaler("default-pool", 3, 5) | ||||
| 				framework.ExpectNoError(err) | ||||
| 			} | ||||
| 		} | ||||
| 	}) | ||||
|  | ||||
| 	ginkgo.AfterEach(func(ctx context.Context) { | ||||
| 		ginkgo.By(fmt.Sprintf("Restoring initial size of the cluster")) | ||||
| 		setMigSizes(originalSizes) | ||||
| 		framework.ExpectNoError(e2enode.WaitForReadyNodes(ctx, c, nodeCount, scaleDownTimeout)) | ||||
| 		nodes, err := c.CoreV1().Nodes().List(ctx, metav1.ListOptions{}) | ||||
| 		framework.ExpectNoError(err) | ||||
| 		s := time.Now() | ||||
| 	makeSchedulableLoop: | ||||
| 		for start := time.Now(); time.Since(start) < makeSchedulableTimeout; time.Sleep(makeSchedulableDelay) { | ||||
| 			for _, n := range nodes.Items { | ||||
| 				err = makeNodeSchedulable(ctx, c, &n, true) | ||||
| 				switch err.(type) { | ||||
| 				case CriticalAddonsOnlyError: | ||||
| 					continue makeSchedulableLoop | ||||
| 				default: | ||||
| 					framework.ExpectNoError(err) | ||||
| 				} | ||||
| 			} | ||||
| 			break | ||||
| 		} | ||||
| 		klog.Infof("Made nodes schedulable again in %v", time.Since(s).String()) | ||||
| 	}) | ||||
|  | ||||
| 	f.It("should scale up at all", feature.ClusterAutoscalerScalability1, func(ctx context.Context) { | ||||
| 		perNodeReservation := int(float64(memCapacityMb) * 0.95) | ||||
| 		replicasPerNode := 10 | ||||
|  | ||||
| 		additionalNodes := maxNodes - nodeCount | ||||
| 		replicas := additionalNodes * replicasPerNode | ||||
| 		additionalReservation := additionalNodes * perNodeReservation | ||||
|  | ||||
| 		// saturate cluster | ||||
| 		reservationCleanup := ReserveMemory(ctx, f, "some-pod", nodeCount*2, nodeCount*perNodeReservation, true, memoryReservationTimeout) | ||||
| 		defer reservationCleanup() | ||||
| 		framework.ExpectNoError(waitForAllCaPodsReadyInNamespace(ctx, f, c)) | ||||
|  | ||||
| 		// configure pending pods & expected scale up | ||||
| 		rcConfig := reserveMemoryRCConfig(f, "extra-pod-1", replicas, additionalReservation, largeScaleUpTimeout) | ||||
| 		expectedResult := createClusterPredicates(nodeCount + additionalNodes) | ||||
| 		config := createScaleUpTestConfig(nodeCount, nodeCount, rcConfig, expectedResult) | ||||
|  | ||||
| 		// run test | ||||
| 		testCleanup := simpleScaleUpTest(ctx, f, config) | ||||
| 		defer testCleanup() | ||||
| 	}) | ||||
|  | ||||
| 	f.It("should scale up twice", feature.ClusterAutoscalerScalability2, func(ctx context.Context) { | ||||
| 		perNodeReservation := int(float64(memCapacityMb) * 0.95) | ||||
| 		replicasPerNode := 10 | ||||
| 		additionalNodes1 := int(math.Ceil(0.7 * maxNodes)) | ||||
| 		additionalNodes2 := int(math.Ceil(0.25 * maxNodes)) | ||||
| 		if additionalNodes1+additionalNodes2 > maxNodes { | ||||
| 			additionalNodes2 = maxNodes - additionalNodes1 | ||||
| 		} | ||||
|  | ||||
| 		replicas1 := additionalNodes1 * replicasPerNode | ||||
| 		replicas2 := additionalNodes2 * replicasPerNode | ||||
|  | ||||
| 		klog.Infof("cores per node: %v", coresPerNode) | ||||
|  | ||||
| 		// saturate cluster | ||||
| 		initialReplicas := nodeCount | ||||
| 		reservationCleanup := ReserveMemory(ctx, f, "some-pod", initialReplicas, nodeCount*perNodeReservation, true, memoryReservationTimeout) | ||||
| 		defer reservationCleanup() | ||||
| 		framework.ExpectNoError(waitForAllCaPodsReadyInNamespace(ctx, f, c)) | ||||
|  | ||||
| 		klog.Infof("Reserved successfully") | ||||
|  | ||||
| 		// configure pending pods & expected scale up #1 | ||||
| 		rcConfig := reserveMemoryRCConfig(f, "extra-pod-1", replicas1, additionalNodes1*perNodeReservation, largeScaleUpTimeout) | ||||
| 		expectedResult := createClusterPredicates(nodeCount + additionalNodes1) | ||||
| 		config := createScaleUpTestConfig(nodeCount, nodeCount, rcConfig, expectedResult) | ||||
|  | ||||
| 		// run test #1 | ||||
| 		tolerateUnreadyNodes := additionalNodes1 / 20 | ||||
| 		tolerateUnreadyPods := (initialReplicas + replicas1) / 20 | ||||
| 		testCleanup1 := simpleScaleUpTestWithTolerance(ctx, f, config, tolerateUnreadyNodes, tolerateUnreadyPods) | ||||
| 		defer testCleanup1() | ||||
|  | ||||
| 		klog.Infof("Scaled up once") | ||||
|  | ||||
| 		// configure pending pods & expected scale up #2 | ||||
| 		rcConfig2 := reserveMemoryRCConfig(f, "extra-pod-2", replicas2, additionalNodes2*perNodeReservation, largeScaleUpTimeout) | ||||
| 		expectedResult2 := createClusterPredicates(nodeCount + additionalNodes1 + additionalNodes2) | ||||
| 		config2 := createScaleUpTestConfig(nodeCount+additionalNodes1, nodeCount+additionalNodes2, rcConfig2, expectedResult2) | ||||
|  | ||||
| 		// run test #2 | ||||
| 		tolerateUnreadyNodes = maxNodes / 20 | ||||
| 		tolerateUnreadyPods = (initialReplicas + replicas1 + replicas2) / 20 | ||||
| 		testCleanup2 := simpleScaleUpTestWithTolerance(ctx, f, config2, tolerateUnreadyNodes, tolerateUnreadyPods) | ||||
| 		defer testCleanup2() | ||||
|  | ||||
| 		klog.Infof("Scaled up twice") | ||||
| 	}) | ||||
|  | ||||
| 	f.It("should scale down empty nodes", feature.ClusterAutoscalerScalability3, func(ctx context.Context) { | ||||
| 		perNodeReservation := int(float64(memCapacityMb) * 0.7) | ||||
| 		replicas := int(math.Ceil(maxNodes * 0.7)) | ||||
| 		totalNodes := maxNodes | ||||
|  | ||||
| 		// resize cluster to totalNodes | ||||
| 		newSizes := map[string]int{ | ||||
| 			anyKey(originalSizes): totalNodes, | ||||
| 		} | ||||
| 		setMigSizes(newSizes) | ||||
| 		framework.ExpectNoError(e2enode.WaitForReadyNodes(ctx, f.ClientSet, totalNodes, largeResizeTimeout)) | ||||
|  | ||||
| 		// run replicas | ||||
| 		rcConfig := reserveMemoryRCConfig(f, "some-pod", replicas, replicas*perNodeReservation, largeScaleUpTimeout) | ||||
| 		expectedResult := createClusterPredicates(totalNodes) | ||||
| 		config := createScaleUpTestConfig(totalNodes, totalNodes, rcConfig, expectedResult) | ||||
| 		tolerateUnreadyNodes := totalNodes / 10 | ||||
| 		tolerateUnreadyPods := replicas / 10 | ||||
| 		testCleanup := simpleScaleUpTestWithTolerance(ctx, f, config, tolerateUnreadyNodes, tolerateUnreadyPods) | ||||
| 		defer testCleanup() | ||||
|  | ||||
| 		// check if empty nodes are scaled down | ||||
| 		framework.ExpectNoError(WaitForClusterSizeFunc(ctx, f.ClientSet, | ||||
| 			func(size int) bool { | ||||
| 				return size <= replicas+3 // leaving space for non-evictable kube-system pods | ||||
| 			}, scaleDownTimeout)) | ||||
| 	}) | ||||
|  | ||||
| 	f.It("should scale down underutilized nodes", feature.ClusterAutoscalerScalability4, func(ctx context.Context) { | ||||
| 		perPodReservation := int(float64(memCapacityMb) * 0.01) | ||||
| 		// underutilizedNodes are 10% full | ||||
| 		underutilizedPerNodeReplicas := 10 | ||||
| 		// fullNodes are 70% full | ||||
| 		fullPerNodeReplicas := 70 | ||||
| 		totalNodes := maxNodes | ||||
| 		underutilizedRatio := 0.3 | ||||
| 		maxDelta := 30 | ||||
|  | ||||
| 		// resize cluster to totalNodes | ||||
| 		newSizes := map[string]int{ | ||||
| 			anyKey(originalSizes): totalNodes, | ||||
| 		} | ||||
| 		setMigSizes(newSizes) | ||||
|  | ||||
| 		framework.ExpectNoError(e2enode.WaitForReadyNodes(ctx, f.ClientSet, totalNodes, largeResizeTimeout)) | ||||
|  | ||||
| 		// annotate all nodes with no-scale-down | ||||
| 		ScaleDownDisabledKey := "cluster-autoscaler.kubernetes.io/scale-down-disabled" | ||||
|  | ||||
| 		nodes, err := f.ClientSet.CoreV1().Nodes().List(ctx, metav1.ListOptions{ | ||||
| 			FieldSelector: fields.Set{ | ||||
| 				"spec.unschedulable": "false", | ||||
| 			}.AsSelector().String(), | ||||
| 		}) | ||||
|  | ||||
| 		framework.ExpectNoError(err) | ||||
| 		framework.ExpectNoError(addAnnotation(ctx, f, nodes.Items, ScaleDownDisabledKey, "true")) | ||||
|  | ||||
| 		// distribute pods using replication controllers taking up space that should | ||||
| 		// be empty after pods are distributed | ||||
| 		underutilizedNodesNum := int(float64(maxNodes) * underutilizedRatio) | ||||
| 		fullNodesNum := totalNodes - underutilizedNodesNum | ||||
|  | ||||
| 		podDistribution := []podBatch{ | ||||
| 			{numNodes: fullNodesNum, podsPerNode: fullPerNodeReplicas}, | ||||
| 			{numNodes: underutilizedNodesNum, podsPerNode: underutilizedPerNodeReplicas}} | ||||
|  | ||||
| 		distributeLoad(ctx, f, f.Namespace.Name, "10-70", podDistribution, perPodReservation, | ||||
| 			int(0.95*float64(memCapacityMb)), map[string]string{}, largeScaleUpTimeout) | ||||
|  | ||||
| 		// enable scale down again | ||||
| 		framework.ExpectNoError(addAnnotation(ctx, f, nodes.Items, ScaleDownDisabledKey, "false")) | ||||
|  | ||||
| 		// wait for scale down to start. Node deletion takes a long time, so we just | ||||
| 		// wait for maximum of 30 nodes deleted | ||||
| 		nodesToScaleDownCount := int(float64(totalNodes) * 0.1) | ||||
| 		if nodesToScaleDownCount > maxDelta { | ||||
| 			nodesToScaleDownCount = maxDelta | ||||
| 		} | ||||
| 		expectedSize := totalNodes - nodesToScaleDownCount | ||||
| 		timeout := time.Duration(nodesToScaleDownCount)*time.Minute + scaleDownTimeout | ||||
| 		framework.ExpectNoError(WaitForClusterSizeFunc(ctx, f.ClientSet, func(size int) bool { | ||||
| 			return size <= expectedSize | ||||
| 		}, timeout)) | ||||
| 	}) | ||||
|  | ||||
| 	f.It("shouldn't scale down with underutilized nodes due to host port conflicts", feature.ClusterAutoscalerScalability5, func(ctx context.Context) { | ||||
| 		fullReservation := int(float64(memCapacityMb) * 0.9) | ||||
| 		hostPortPodReservation := int(float64(memCapacityMb) * 0.3) | ||||
| 		totalNodes := maxNodes | ||||
| 		reservedPort := 4321 | ||||
|  | ||||
| 		// resize cluster to totalNodes | ||||
| 		newSizes := map[string]int{ | ||||
| 			anyKey(originalSizes): totalNodes, | ||||
| 		} | ||||
| 		setMigSizes(newSizes) | ||||
| 		framework.ExpectNoError(e2enode.WaitForReadyNodes(ctx, f.ClientSet, totalNodes, largeResizeTimeout)) | ||||
| 		divider := int(float64(totalNodes) * 0.7) | ||||
| 		fullNodesCount := divider | ||||
| 		underutilizedNodesCount := totalNodes - fullNodesCount | ||||
|  | ||||
| 		ginkgo.By("Reserving full nodes") | ||||
| 		// run RC1 w/o host port | ||||
| 		cleanup := ReserveMemory(ctx, f, "filling-pod", fullNodesCount, fullNodesCount*fullReservation, true, largeScaleUpTimeout*2) | ||||
| 		defer cleanup() | ||||
|  | ||||
| 		ginkgo.By("Reserving host ports on remaining nodes") | ||||
| 		// run RC2 w/ host port | ||||
| 		ginkgo.DeferCleanup(createHostPortPodsWithMemory, f, "underutilizing-host-port-pod", underutilizedNodesCount, reservedPort, underutilizedNodesCount*hostPortPodReservation, largeScaleUpTimeout) | ||||
|  | ||||
| 		framework.ExpectNoError(waitForAllCaPodsReadyInNamespace(ctx, f, c)) | ||||
| 		// wait and check scale down doesn't occur | ||||
| 		ginkgo.By(fmt.Sprintf("Sleeping %v minutes...", scaleDownTimeout.Minutes())) | ||||
| 		time.Sleep(scaleDownTimeout) | ||||
|  | ||||
| 		ginkgo.By("Checking if the number of nodes is as expected") | ||||
| 		nodes, err := e2enode.GetReadySchedulableNodes(ctx, f.ClientSet) | ||||
| 		framework.ExpectNoError(err) | ||||
| 		klog.Infof("Nodes: %v, expected: %v", len(nodes.Items), totalNodes) | ||||
| 		gomega.Expect(nodes.Items).To(gomega.HaveLen(totalNodes)) | ||||
| 	}) | ||||
|  | ||||
| 	f.It("CA ignores unschedulable pods while scheduling schedulable pods", feature.ClusterAutoscalerScalability6, func(ctx context.Context) { | ||||
| 		// Start a number of pods saturating existing nodes. | ||||
| 		perNodeReservation := int(float64(memCapacityMb) * 0.80) | ||||
| 		replicasPerNode := 10 | ||||
| 		initialPodReplicas := nodeCount * replicasPerNode | ||||
| 		initialPodsTotalMemory := nodeCount * perNodeReservation | ||||
| 		reservationCleanup := ReserveMemory(ctx, f, "initial-pod", initialPodReplicas, initialPodsTotalMemory, true /* wait for pods to run */, memoryReservationTimeout) | ||||
| 		ginkgo.DeferCleanup(reservationCleanup) | ||||
| 		framework.ExpectNoError(waitForAllCaPodsReadyInNamespace(ctx, f, c)) | ||||
|  | ||||
| 		// Configure a number of unschedulable pods. | ||||
| 		unschedulableMemReservation := memCapacityMb * 2 | ||||
| 		unschedulablePodReplicas := 1000 | ||||
| 		totalMemReservation := unschedulableMemReservation * unschedulablePodReplicas | ||||
| 		timeToWait := 5 * time.Minute | ||||
| 		podsConfig := reserveMemoryRCConfig(f, "unschedulable-pod", unschedulablePodReplicas, totalMemReservation, timeToWait) | ||||
| 		_ = e2erc.RunRC(ctx, *podsConfig) // Ignore error (it will occur because pods are unschedulable) | ||||
| 		ginkgo.DeferCleanup(e2erc.DeleteRCAndWaitForGC, f.ClientSet, f.Namespace.Name, podsConfig.Name) | ||||
|  | ||||
| 		// Ensure that no new nodes have been added so far. | ||||
| 		readyNodeCount, _ := e2enode.TotalReady(ctx, f.ClientSet) | ||||
| 		gomega.Expect(readyNodeCount).To(gomega.Equal(nodeCount)) | ||||
|  | ||||
| 		// Start a number of schedulable pods to ensure CA reacts. | ||||
| 		additionalNodes := maxNodes - nodeCount | ||||
| 		replicas := additionalNodes * replicasPerNode | ||||
| 		totalMemory := additionalNodes * perNodeReservation | ||||
| 		rcConfig := reserveMemoryRCConfig(f, "extra-pod", replicas, totalMemory, largeScaleUpTimeout) | ||||
| 		expectedResult := createClusterPredicates(nodeCount + additionalNodes) | ||||
| 		config := createScaleUpTestConfig(nodeCount, initialPodReplicas, rcConfig, expectedResult) | ||||
|  | ||||
| 		// Test that scale up happens, allowing 1000 unschedulable pods not to be scheduled. | ||||
| 		testCleanup := simpleScaleUpTestWithTolerance(ctx, f, config, 0, unschedulablePodReplicas) | ||||
| 		ginkgo.DeferCleanup(testCleanup) | ||||
| 	}) | ||||
|  | ||||
| }) | ||||
|  | ||||
| func anyKey(input map[string]int) string { | ||||
| 	for k := range input { | ||||
| 		return k | ||||
| 	} | ||||
| 	return "" | ||||
| } | ||||
|  | ||||
| func simpleScaleUpTestWithTolerance(ctx context.Context, f *framework.Framework, config *scaleUpTestConfig, tolerateMissingNodeCount int, tolerateMissingPodCount int) func() error { | ||||
| 	// resize cluster to start size | ||||
| 	// run rc based on config | ||||
| 	ginkgo.By(fmt.Sprintf("Running RC %v from config", config.extraPods.Name)) | ||||
| 	start := time.Now() | ||||
| 	framework.ExpectNoError(e2erc.RunRC(ctx, *config.extraPods)) | ||||
| 	// check results | ||||
| 	if tolerateMissingNodeCount > 0 { | ||||
| 		// Tolerate some number of nodes not to be created. | ||||
| 		minExpectedNodeCount := config.expectedResult.nodes - tolerateMissingNodeCount | ||||
| 		framework.ExpectNoError(WaitForClusterSizeFunc(ctx, f.ClientSet, | ||||
| 			func(size int) bool { return size >= minExpectedNodeCount }, scaleUpTimeout)) | ||||
| 	} else { | ||||
| 		framework.ExpectNoError(e2enode.WaitForReadyNodes(ctx, f.ClientSet, config.expectedResult.nodes, scaleUpTimeout)) | ||||
| 	} | ||||
| 	klog.Infof("cluster is increased") | ||||
| 	if tolerateMissingPodCount > 0 { | ||||
| 		framework.ExpectNoError(waitForCaPodsReadyInNamespace(ctx, f, f.ClientSet, tolerateMissingPodCount)) | ||||
| 	} else { | ||||
| 		framework.ExpectNoError(waitForAllCaPodsReadyInNamespace(ctx, f, f.ClientSet)) | ||||
| 	} | ||||
| 	timeTrack(start, fmt.Sprintf("Scale up to %v", config.expectedResult.nodes)) | ||||
| 	return func() error { | ||||
| 		return e2erc.DeleteRCAndWaitForGC(ctx, f.ClientSet, f.Namespace.Name, config.extraPods.Name) | ||||
| 	} | ||||
| } | ||||
|  | ||||
| func simpleScaleUpTest(ctx context.Context, f *framework.Framework, config *scaleUpTestConfig) func() error { | ||||
| 	return simpleScaleUpTestWithTolerance(ctx, f, config, 0, 0) | ||||
| } | ||||
|  | ||||
| func reserveMemoryRCConfig(f *framework.Framework, id string, replicas, megabytes int, timeout time.Duration) *testutils.RCConfig { | ||||
| 	return &testutils.RCConfig{ | ||||
| 		Client:     f.ClientSet, | ||||
| 		Name:       id, | ||||
| 		Namespace:  f.Namespace.Name, | ||||
| 		Timeout:    timeout, | ||||
| 		Image:      imageutils.GetPauseImageName(), | ||||
| 		Replicas:   replicas, | ||||
| 		MemRequest: int64(1024 * 1024 * megabytes / replicas), | ||||
| 	} | ||||
| } | ||||
|  | ||||
| func createScaleUpTestConfig(nodes, pods int, extraPods *testutils.RCConfig, expectedResult *clusterPredicates) *scaleUpTestConfig { | ||||
| 	return &scaleUpTestConfig{ | ||||
| 		initialNodes:   nodes, | ||||
| 		initialPods:    pods, | ||||
| 		extraPods:      extraPods, | ||||
| 		expectedResult: expectedResult, | ||||
| 	} | ||||
| } | ||||
|  | ||||
| func createClusterPredicates(nodes int) *clusterPredicates { | ||||
| 	return &clusterPredicates{ | ||||
| 		nodes: nodes, | ||||
| 	} | ||||
| } | ||||
|  | ||||
| func addAnnotation(ctx context.Context, f *framework.Framework, nodes []v1.Node, key, value string) error { | ||||
| 	for _, node := range nodes { | ||||
| 		oldData, err := json.Marshal(node) | ||||
| 		if err != nil { | ||||
| 			return err | ||||
| 		} | ||||
|  | ||||
| 		if node.Annotations == nil { | ||||
| 			node.Annotations = make(map[string]string) | ||||
| 		} | ||||
| 		node.Annotations[key] = value | ||||
|  | ||||
| 		newData, err := json.Marshal(node) | ||||
| 		if err != nil { | ||||
| 			return err | ||||
| 		} | ||||
|  | ||||
| 		patchBytes, err := strategicpatch.CreateTwoWayMergePatch(oldData, newData, v1.Node{}) | ||||
| 		if err != nil { | ||||
| 			return err | ||||
| 		} | ||||
|  | ||||
| 		_, err = f.ClientSet.CoreV1().Nodes().Patch(ctx, string(node.Name), types.StrategicMergePatchType, patchBytes, metav1.PatchOptions{}) | ||||
| 		if err != nil { | ||||
| 			return err | ||||
| 		} | ||||
| 	} | ||||
| 	return nil | ||||
| } | ||||
|  | ||||
| func createHostPortPodsWithMemory(ctx context.Context, f *framework.Framework, id string, replicas, port, megabytes int, timeout time.Duration) func() error { | ||||
| 	ginkgo.By(fmt.Sprintf("Running RC which reserves host port and memory")) | ||||
| 	request := int64(1024 * 1024 * megabytes / replicas) | ||||
| 	config := &testutils.RCConfig{ | ||||
| 		Client:     f.ClientSet, | ||||
| 		Name:       id, | ||||
| 		Namespace:  f.Namespace.Name, | ||||
| 		Timeout:    timeout, | ||||
| 		Image:      imageutils.GetPauseImageName(), | ||||
| 		Replicas:   replicas, | ||||
| 		HostPorts:  map[string]int{"port1": port}, | ||||
| 		MemRequest: request, | ||||
| 	} | ||||
| 	err := e2erc.RunRC(ctx, *config) | ||||
| 	framework.ExpectNoError(err) | ||||
| 	return func() error { | ||||
| 		return e2erc.DeleteRCAndWaitForGC(ctx, f.ClientSet, f.Namespace.Name, id) | ||||
| 	} | ||||
| } | ||||
|  | ||||
| type podBatch struct { | ||||
| 	numNodes    int | ||||
| 	podsPerNode int | ||||
| } | ||||
|  | ||||
| // distributeLoad distributes the pods in the way described by podDostribution, | ||||
| // assuming all pods will have the same memory reservation and all nodes the same | ||||
| // memory capacity. This allows us generate the load on the cluster in the exact | ||||
| // way that we want. | ||||
| // | ||||
| // To achieve this we do the following: | ||||
| // 1. Create replication controllers that eat up all the space that should be | ||||
| // empty after setup, making sure they end up on different nodes by specifying | ||||
| // conflicting host port | ||||
| // 2. Create target RC that will generate the load on the cluster | ||||
| // 3. Remove the rcs created in 1. | ||||
| func distributeLoad(ctx context.Context, f *framework.Framework, namespace string, id string, podDistribution []podBatch, | ||||
| 	podMemRequestMegabytes int, nodeMemCapacity int, labels map[string]string, timeout time.Duration) { | ||||
| 	port := 8013 | ||||
| 	// Create load-distribution RCs with one pod per node, reserving all remaining | ||||
| 	// memory to force the distribution of pods for the target RCs. | ||||
| 	// The load-distribution RCs will be deleted on function return. | ||||
| 	totalPods := 0 | ||||
| 	for i, podBatch := range podDistribution { | ||||
| 		totalPods += podBatch.numNodes * podBatch.podsPerNode | ||||
| 		remainingMem := nodeMemCapacity - podBatch.podsPerNode*podMemRequestMegabytes | ||||
| 		replicas := podBatch.numNodes | ||||
| 		cleanup := createHostPortPodsWithMemory(ctx, f, fmt.Sprintf("load-distribution%d", i), replicas, port, remainingMem*replicas, timeout) | ||||
| 		defer cleanup() | ||||
| 	} | ||||
| 	framework.ExpectNoError(waitForAllCaPodsReadyInNamespace(ctx, f, f.ClientSet)) | ||||
| 	// Create the target RC | ||||
| 	rcConfig := reserveMemoryRCConfig(f, id, totalPods, totalPods*podMemRequestMegabytes, timeout) | ||||
| 	framework.ExpectNoError(e2erc.RunRC(ctx, *rcConfig)) | ||||
| 	framework.ExpectNoError(waitForAllCaPodsReadyInNamespace(ctx, f, f.ClientSet)) | ||||
| 	ginkgo.DeferCleanup(e2erc.DeleteRCAndWaitForGC, f.ClientSet, f.Namespace.Name, id) | ||||
| } | ||||
|  | ||||
| func timeTrack(start time.Time, name string) { | ||||
| 	elapsed := time.Since(start) | ||||
| 	klog.Infof("%s took %s", name, elapsed) | ||||
| } | ||||
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							| @@ -1,425 +0,0 @@ | ||||
| /* | ||||
| Copyright 2016 The Kubernetes Authors. | ||||
|  | ||||
| Licensed under the Apache License, Version 2.0 (the "License"); | ||||
| you may not use this file except in compliance with the License. | ||||
| You may obtain a copy of the License at | ||||
|  | ||||
|     http://www.apache.org/licenses/LICENSE-2.0 | ||||
|  | ||||
| Unless required by applicable law or agreed to in writing, software | ||||
| distributed under the License is distributed on an "AS IS" BASIS, | ||||
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
| See the License for the specific language governing permissions and | ||||
| limitations under the License. | ||||
| */ | ||||
|  | ||||
| package autoscaling | ||||
|  | ||||
| import ( | ||||
| 	"context" | ||||
| 	"fmt" | ||||
| 	"math" | ||||
| 	"strings" | ||||
| 	"time" | ||||
|  | ||||
| 	v1 "k8s.io/api/core/v1" | ||||
| 	"k8s.io/apimachinery/pkg/api/resource" | ||||
| 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" | ||||
| 	"k8s.io/apimachinery/pkg/labels" | ||||
| 	"k8s.io/apimachinery/pkg/util/wait" | ||||
| 	clientset "k8s.io/client-go/kubernetes" | ||||
| 	"k8s.io/kubernetes/test/e2e/framework" | ||||
| 	e2enode "k8s.io/kubernetes/test/e2e/framework/node" | ||||
| 	e2epod "k8s.io/kubernetes/test/e2e/framework/pod" | ||||
| 	e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper" | ||||
| 	admissionapi "k8s.io/pod-security-admission/api" | ||||
|  | ||||
| 	"github.com/onsi/ginkgo/v2" | ||||
| ) | ||||
|  | ||||
| // This test requires coredns to be installed on the cluster with autoscaling enabled. | ||||
| // Compare your coredns manifest against the command below | ||||
| // helm template coredns -n kube-system coredns/coredns --set k8sAppLabelOverride=kube-dns --set fullnameOverride=coredns --set autoscaler.enabled=true | ||||
|  | ||||
| // Constants used in dns-autoscaling test. | ||||
| const ( | ||||
| 	DNSdefaultTimeout    = 5 * time.Minute | ||||
| 	ClusterAddonLabelKey = "k8s-app" | ||||
| 	DNSLabelName         = "kube-dns" | ||||
| ) | ||||
|  | ||||
| var _ = SIGDescribe("DNS horizontal autoscaling", func() { | ||||
| 	f := framework.NewDefaultFramework("dns-autoscaling") | ||||
| 	f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged | ||||
| 	var c clientset.Interface | ||||
| 	var previousParams map[string]string | ||||
| 	var configMapNames map[string]string | ||||
| 	var originDNSReplicasCount int | ||||
| 	var DNSParams1 DNSParamsLinear | ||||
| 	var DNSParams2 DNSParamsLinear | ||||
| 	var DNSParams3 DNSParamsLinear | ||||
|  | ||||
| 	ginkgo.BeforeEach(func(ctx context.Context) { | ||||
| 		e2eskipper.SkipUnlessProviderIs("gce", "gke") | ||||
| 		c = f.ClientSet | ||||
|  | ||||
| 		nodes, err := e2enode.GetReadySchedulableNodes(ctx, c) | ||||
| 		framework.ExpectNoError(err) | ||||
| 		nodeCount := len(nodes.Items) | ||||
|  | ||||
| 		ginkgo.By("Collecting original replicas count and DNS scaling params") | ||||
|  | ||||
| 		// Check if we are running coredns or kube-dns, the only difference is the name of the autoscaling CM. | ||||
| 		// The test should be have identically on both dns providers | ||||
| 		provider, err := detectDNSProvider(ctx, c) | ||||
| 		framework.ExpectNoError(err) | ||||
|  | ||||
| 		originDNSReplicasCount, err = getDNSReplicas(ctx, c) | ||||
| 		framework.ExpectNoError(err) | ||||
| 		configMapNames = map[string]string{ | ||||
| 			"kube-dns": "kube-dns-autoscaler", | ||||
| 			"coredns":  "coredns-autoscaler", | ||||
| 		} | ||||
|  | ||||
| 		pcm, err := fetchDNSScalingConfigMap(ctx, c, configMapNames[provider]) | ||||
| 		framework.Logf("original DNS scaling params: %v", pcm) | ||||
| 		framework.ExpectNoError(err) | ||||
| 		previousParams = pcm.Data | ||||
|  | ||||
| 		if nodeCount <= 500 { | ||||
| 			DNSParams1 = DNSParamsLinear{ | ||||
| 				nodesPerReplica: 1, | ||||
| 			} | ||||
| 			DNSParams2 = DNSParamsLinear{ | ||||
| 				nodesPerReplica: 2, | ||||
| 			} | ||||
| 			DNSParams3 = DNSParamsLinear{ | ||||
| 				nodesPerReplica: 3, | ||||
| 				coresPerReplica: 3, | ||||
| 			} | ||||
| 		} else { | ||||
| 			// In large clusters, avoid creating/deleting too many DNS pods, | ||||
| 			// it is supposed to be correctness test, not performance one. | ||||
| 			// The default setup is: 256 cores/replica, 16 nodes/replica. | ||||
| 			// With nodeCount > 500, nodes/13, nodes/14, nodes/15 and nodes/16 | ||||
| 			// are different numbers. | ||||
| 			DNSParams1 = DNSParamsLinear{ | ||||
| 				nodesPerReplica: 13, | ||||
| 			} | ||||
| 			DNSParams2 = DNSParamsLinear{ | ||||
| 				nodesPerReplica: 14, | ||||
| 			} | ||||
| 			DNSParams3 = DNSParamsLinear{ | ||||
| 				nodesPerReplica: 15, | ||||
| 				coresPerReplica: 15, | ||||
| 			} | ||||
| 		} | ||||
| 	}) | ||||
|  | ||||
| 	// This test is separated because it is slow and need to run serially. | ||||
| 	// Will take around 5 minutes to run on a 4 nodes cluster. | ||||
| 	// TODO(upodroid) This test will be removed in 1.33 when kubeup is removed | ||||
| 	// TODO: make it cloud provider agnostic or move it to cloud-provider-gcp repository | ||||
| 	f.It(f.WithSerial(), f.WithSlow(), f.WithLabel("KubeUp"), f.WithLabel("sig-cloud-provider-gcp"), "kube-dns-autoscaler should scale kube-dns pods when cluster size changed", func(ctx context.Context) { | ||||
| 		numNodes, err := e2enode.TotalRegistered(ctx, c) | ||||
| 		framework.ExpectNoError(err) | ||||
|  | ||||
| 		configMapNames = map[string]string{ | ||||
| 			"kube-dns": "kube-dns-autoscaler", | ||||
| 			"coredns":  "coredns-autoscaler", | ||||
| 		} | ||||
| 		provider, err := detectDNSProvider(ctx, c) | ||||
| 		framework.ExpectNoError(err) | ||||
|  | ||||
| 		ginkgo.By("Replace the dns autoscaling parameters with testing parameters") | ||||
| 		err = updateDNSScalingConfigMap(ctx, c, packDNSScalingConfigMap(configMapNames[provider], packLinearParams(&DNSParams1))) | ||||
| 		framework.ExpectNoError(err) | ||||
| 		defer func() { | ||||
| 			ginkgo.By("Restoring initial dns autoscaling parameters") | ||||
| 			err = updateDNSScalingConfigMap(ctx, c, packDNSScalingConfigMap(configMapNames[provider], previousParams)) | ||||
| 			framework.ExpectNoError(err) | ||||
|  | ||||
| 			ginkgo.By("Wait for number of running and ready kube-dns pods recover") | ||||
| 			label := labels.SelectorFromSet(labels.Set(map[string]string{ClusterAddonLabelKey: DNSLabelName})) | ||||
| 			_, err := e2epod.WaitForPodsWithLabelRunningReady(ctx, c, metav1.NamespaceSystem, label, originDNSReplicasCount, DNSdefaultTimeout) | ||||
| 			framework.ExpectNoError(err) | ||||
| 		}() | ||||
| 		ginkgo.By("Wait for kube-dns scaled to expected number") | ||||
| 		getExpectReplicasLinear := getExpectReplicasFuncLinear(ctx, c, &DNSParams1) | ||||
| 		err = waitForDNSReplicasSatisfied(ctx, c, getExpectReplicasLinear, DNSdefaultTimeout) | ||||
| 		framework.ExpectNoError(err) | ||||
|  | ||||
| 		originalSizes := make(map[string]int) | ||||
| 		for _, mig := range strings.Split(framework.TestContext.CloudConfig.NodeInstanceGroup, ",") { | ||||
| 			size, err := framework.GroupSize(mig) | ||||
| 			framework.ExpectNoError(err) | ||||
| 			ginkgo.By(fmt.Sprintf("Initial size of %s: %d", mig, size)) | ||||
| 			originalSizes[mig] = size | ||||
| 		} | ||||
|  | ||||
| 		ginkgo.By("Manually increase cluster size") | ||||
| 		increasedSizes := make(map[string]int) | ||||
| 		for key, val := range originalSizes { | ||||
| 			increasedSizes[key] = val + 1 | ||||
| 		} | ||||
| 		setMigSizes(increasedSizes) | ||||
| 		err = WaitForClusterSizeFunc(ctx, c, | ||||
| 			func(size int) bool { return size == numNodes+len(originalSizes) }, scaleUpTimeout) | ||||
| 		framework.ExpectNoError(err) | ||||
|  | ||||
| 		ginkgo.By("Wait for kube-dns scaled to expected number") | ||||
| 		getExpectReplicasLinear = getExpectReplicasFuncLinear(ctx, c, &DNSParams1) | ||||
| 		err = waitForDNSReplicasSatisfied(ctx, c, getExpectReplicasLinear, DNSdefaultTimeout) | ||||
| 		framework.ExpectNoError(err) | ||||
|  | ||||
| 		ginkgo.By("Replace the dns autoscaling parameters with another testing parameters") | ||||
| 		err = updateDNSScalingConfigMap(ctx, c, packDNSScalingConfigMap(configMapNames[provider], packLinearParams(&DNSParams3))) | ||||
| 		framework.ExpectNoError(err) | ||||
|  | ||||
| 		ginkgo.By("Wait for kube-dns scaled to expected number") | ||||
| 		getExpectReplicasLinear = getExpectReplicasFuncLinear(ctx, c, &DNSParams3) | ||||
| 		err = waitForDNSReplicasSatisfied(ctx, c, getExpectReplicasLinear, DNSdefaultTimeout) | ||||
| 		framework.ExpectNoError(err) | ||||
|  | ||||
| 		ginkgo.By("Restoring cluster size") | ||||
| 		setMigSizes(originalSizes) | ||||
| 		err = e2enode.WaitForReadyNodes(ctx, c, numNodes, scaleDownTimeout) | ||||
| 		framework.ExpectNoError(err) | ||||
|  | ||||
| 		ginkgo.By("Wait for kube-dns scaled to expected number") | ||||
| 		err = waitForDNSReplicasSatisfied(ctx, c, getExpectReplicasLinear, DNSdefaultTimeout) | ||||
| 		framework.ExpectNoError(err) | ||||
| 	}) | ||||
|  | ||||
| 	ginkgo.It("kube-dns-autoscaler should scale kube-dns pods in both nonfaulty and faulty scenarios", func(ctx context.Context) { | ||||
|  | ||||
| 		configMapNames = map[string]string{ | ||||
| 			"kube-dns": "kube-dns-autoscaler", | ||||
| 			"coredns":  "coredns-autoscaler", | ||||
| 		} | ||||
| 		provider, err := detectDNSProvider(ctx, c) | ||||
| 		framework.ExpectNoError(err) | ||||
|  | ||||
| 		ginkgo.By("Replace the dns autoscaling parameters with testing parameters") | ||||
| 		cm := packDNSScalingConfigMap(configMapNames[provider], packLinearParams(&DNSParams1)) | ||||
| 		framework.Logf("Updating the following cm: %v", cm) | ||||
| 		err = updateDNSScalingConfigMap(ctx, c, cm) | ||||
| 		framework.ExpectNoError(err) | ||||
| 		defer func() { | ||||
| 			ginkgo.By("Restoring initial dns autoscaling parameters") | ||||
| 			err = updateDNSScalingConfigMap(ctx, c, packDNSScalingConfigMap(configMapNames[provider], previousParams)) | ||||
| 			framework.ExpectNoError(err) | ||||
| 		}() | ||||
| 		ginkgo.By("Wait for kube-dns scaled to expected number") | ||||
| 		getExpectReplicasLinear := getExpectReplicasFuncLinear(ctx, c, &DNSParams1) | ||||
| 		err = waitForDNSReplicasSatisfied(ctx, c, getExpectReplicasLinear, DNSdefaultTimeout) | ||||
| 		framework.ExpectNoError(err) | ||||
|  | ||||
| 		ginkgo.By("--- Scenario: should scale kube-dns based on changed parameters ---") | ||||
| 		ginkgo.By("Replace the dns autoscaling parameters with another testing parameters") | ||||
| 		err = updateDNSScalingConfigMap(ctx, c, packDNSScalingConfigMap(configMapNames[provider], packLinearParams(&DNSParams3))) | ||||
| 		framework.ExpectNoError(err) | ||||
| 		ginkgo.By("Wait for kube-dns scaled to expected number") | ||||
| 		getExpectReplicasLinear = getExpectReplicasFuncLinear(ctx, c, &DNSParams3) | ||||
| 		err = waitForDNSReplicasSatisfied(ctx, c, getExpectReplicasLinear, DNSdefaultTimeout) | ||||
| 		framework.ExpectNoError(err) | ||||
|  | ||||
| 		ginkgo.By("--- Scenario: should re-create scaling parameters with default value when parameters got deleted ---") | ||||
| 		ginkgo.By("Delete the ConfigMap for autoscaler") | ||||
| 		err = deleteDNSScalingConfigMap(ctx, c, configMapNames[provider]) | ||||
| 		framework.ExpectNoError(err) | ||||
|  | ||||
| 		ginkgo.By("Wait for the ConfigMap got re-created") | ||||
| 		_, err = waitForDNSConfigMapCreated(ctx, c, DNSdefaultTimeout, configMapNames[provider]) | ||||
| 		framework.ExpectNoError(err) | ||||
|  | ||||
| 		ginkgo.By("Replace the dns autoscaling parameters with another testing parameters") | ||||
| 		err = updateDNSScalingConfigMap(ctx, c, packDNSScalingConfigMap(configMapNames[provider], packLinearParams(&DNSParams2))) | ||||
| 		framework.ExpectNoError(err) | ||||
| 		ginkgo.By("Wait for kube-dns/coredns scaled to expected number") | ||||
| 		getExpectReplicasLinear = getExpectReplicasFuncLinear(ctx, c, &DNSParams2) | ||||
| 		err = waitForDNSReplicasSatisfied(ctx, c, getExpectReplicasLinear, DNSdefaultTimeout) | ||||
| 		framework.ExpectNoError(err) | ||||
|  | ||||
| 		ginkgo.By("--- Scenario: should recover after autoscaler pod got deleted ---") | ||||
| 		ginkgo.By("Delete the autoscaler pod for kube-dns/coredns") | ||||
| 		err = deleteDNSAutoscalerPod(ctx, c) | ||||
| 		framework.ExpectNoError(err) | ||||
|  | ||||
| 		ginkgo.By("Replace the dns autoscaling parameters with another testing parameters") | ||||
| 		err = updateDNSScalingConfigMap(ctx, c, packDNSScalingConfigMap(configMapNames[provider], packLinearParams(&DNSParams1))) | ||||
| 		framework.ExpectNoError(err) | ||||
| 		ginkgo.By("Wait for kube-dns/coredns scaled to expected number") | ||||
| 		getExpectReplicasLinear = getExpectReplicasFuncLinear(ctx, c, &DNSParams1) | ||||
| 		err = waitForDNSReplicasSatisfied(ctx, c, getExpectReplicasLinear, DNSdefaultTimeout) | ||||
| 		framework.ExpectNoError(err) | ||||
| 	}) | ||||
| }) | ||||
|  | ||||
| // DNSParamsLinear is a struct for number of DNS pods. | ||||
| type DNSParamsLinear struct { | ||||
| 	nodesPerReplica float64 | ||||
| 	coresPerReplica float64 | ||||
| 	min             int | ||||
| 	max             int | ||||
| } | ||||
|  | ||||
| type getExpectReplicasFunc func(c clientset.Interface) int | ||||
|  | ||||
| func getExpectReplicasFuncLinear(ctx context.Context, c clientset.Interface, params *DNSParamsLinear) getExpectReplicasFunc { | ||||
| 	return func(c clientset.Interface) int { | ||||
| 		var replicasFromNodes float64 | ||||
| 		var replicasFromCores float64 | ||||
| 		nodes, err := e2enode.GetReadyNodesIncludingTainted(ctx, c) | ||||
| 		framework.ExpectNoError(err) | ||||
| 		if params.nodesPerReplica > 0 { | ||||
| 			replicasFromNodes = math.Ceil(float64(len(nodes.Items)) / params.nodesPerReplica) | ||||
| 		} | ||||
| 		if params.coresPerReplica > 0 { | ||||
| 			replicasFromCores = math.Ceil(float64(getSchedulableCores(nodes.Items)) / params.coresPerReplica) | ||||
| 		} | ||||
| 		return int(math.Max(1.0, math.Max(replicasFromNodes, replicasFromCores))) | ||||
| 	} | ||||
| } | ||||
|  | ||||
| func getSchedulableCores(nodes []v1.Node) int64 { | ||||
| 	var sc resource.Quantity | ||||
| 	for _, node := range nodes { | ||||
| 		if !node.Spec.Unschedulable { | ||||
| 			sc.Add(node.Status.Allocatable[v1.ResourceCPU]) | ||||
| 		} | ||||
| 	} | ||||
| 	return sc.Value() | ||||
| } | ||||
|  | ||||
| func detectDNSProvider(ctx context.Context, c clientset.Interface) (string, error) { | ||||
| 	cm, err := c.CoreV1().ConfigMaps(metav1.NamespaceSystem).Get(ctx, "coredns-autoscaler", metav1.GetOptions{}) | ||||
| 	if cm != nil && err == nil { | ||||
| 		return "coredns", nil | ||||
| 	} | ||||
|  | ||||
| 	cm, err = c.CoreV1().ConfigMaps(metav1.NamespaceSystem).Get(ctx, "kube-dns-autoscaler", metav1.GetOptions{}) | ||||
| 	if cm != nil && err == nil { | ||||
| 		return "kube-dns", nil | ||||
| 	} | ||||
|  | ||||
| 	return "", fmt.Errorf("the cluster doesn't have kube-dns or coredns autoscaling configured") | ||||
| } | ||||
|  | ||||
| func fetchDNSScalingConfigMap(ctx context.Context, c clientset.Interface, configMapName string) (*v1.ConfigMap, error) { | ||||
| 	cm, err := c.CoreV1().ConfigMaps(metav1.NamespaceSystem).Get(ctx, configMapName, metav1.GetOptions{}) | ||||
| 	if err != nil { | ||||
| 		return nil, err | ||||
| 	} | ||||
| 	return cm, nil | ||||
| } | ||||
|  | ||||
| func deleteDNSScalingConfigMap(ctx context.Context, c clientset.Interface, configMapName string) error { | ||||
| 	if err := c.CoreV1().ConfigMaps(metav1.NamespaceSystem).Delete(ctx, configMapName, metav1.DeleteOptions{}); err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	framework.Logf("DNS autoscaling ConfigMap deleted.") | ||||
| 	return nil | ||||
| } | ||||
|  | ||||
| func packLinearParams(params *DNSParamsLinear) map[string]string { | ||||
| 	paramsMap := make(map[string]string) | ||||
| 	paramsMap["linear"] = fmt.Sprintf("{\"nodesPerReplica\": %v,\"coresPerReplica\": %v,\"min\": %v,\"max\": %v}", | ||||
| 		params.nodesPerReplica, | ||||
| 		params.coresPerReplica, | ||||
| 		params.min, | ||||
| 		params.max) | ||||
| 	return paramsMap | ||||
| } | ||||
|  | ||||
| func packDNSScalingConfigMap(configMapName string, params map[string]string) *v1.ConfigMap { | ||||
| 	configMap := v1.ConfigMap{} | ||||
| 	configMap.ObjectMeta.Name = configMapName | ||||
| 	configMap.ObjectMeta.Namespace = metav1.NamespaceSystem | ||||
| 	configMap.Data = params | ||||
| 	return &configMap | ||||
| } | ||||
|  | ||||
| func updateDNSScalingConfigMap(ctx context.Context, c clientset.Interface, configMap *v1.ConfigMap) error { | ||||
| 	_, err := c.CoreV1().ConfigMaps(metav1.NamespaceSystem).Update(ctx, configMap, metav1.UpdateOptions{}) | ||||
| 	if err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	framework.Logf("DNS autoscaling ConfigMap updated.") | ||||
| 	return nil | ||||
| } | ||||
|  | ||||
| func getDNSReplicas(ctx context.Context, c clientset.Interface) (int, error) { | ||||
| 	label := labels.SelectorFromSet(labels.Set(map[string]string{ClusterAddonLabelKey: DNSLabelName})) | ||||
| 	listOpts := metav1.ListOptions{LabelSelector: label.String()} | ||||
| 	deployments, err := c.AppsV1().Deployments(metav1.NamespaceSystem).List(ctx, listOpts) | ||||
| 	if err != nil { | ||||
| 		return 0, err | ||||
| 	} | ||||
| 	if len(deployments.Items) != 1 { | ||||
| 		return 0, fmt.Errorf("expected 1 DNS deployment, got %v", len(deployments.Items)) | ||||
| 	} | ||||
|  | ||||
| 	deployment := deployments.Items[0] | ||||
| 	return int(*(deployment.Spec.Replicas)), nil | ||||
| } | ||||
|  | ||||
| func deleteDNSAutoscalerPod(ctx context.Context, c clientset.Interface) error { | ||||
| 	selector, _ := labels.Parse(fmt.Sprintf("%s in (kube-dns-autoscaler, coredns-autoscaler)", ClusterAddonLabelKey)) | ||||
| 	listOpts := metav1.ListOptions{LabelSelector: selector.String()} | ||||
| 	pods, err := c.CoreV1().Pods(metav1.NamespaceSystem).List(ctx, listOpts) | ||||
| 	if err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	if len(pods.Items) != 1 { | ||||
| 		return fmt.Errorf("expected 1 autoscaler pod, got %v", len(pods.Items)) | ||||
| 	} | ||||
|  | ||||
| 	podName := pods.Items[0].Name | ||||
| 	if err := c.CoreV1().Pods(metav1.NamespaceSystem).Delete(ctx, podName, metav1.DeleteOptions{}); err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	framework.Logf("DNS autoscaling pod %v deleted.", podName) | ||||
| 	return nil | ||||
| } | ||||
|  | ||||
| func waitForDNSReplicasSatisfied(ctx context.Context, c clientset.Interface, getExpected getExpectReplicasFunc, timeout time.Duration) (err error) { | ||||
| 	var current int | ||||
| 	var expected int | ||||
| 	framework.Logf("Waiting up to %v for kube-dns to reach expected replicas", timeout) | ||||
| 	condition := func(ctx context.Context) (bool, error) { | ||||
| 		current, err = getDNSReplicas(ctx, c) | ||||
| 		if err != nil { | ||||
| 			return false, err | ||||
| 		} | ||||
| 		expected = getExpected(c) | ||||
| 		if current != expected { | ||||
| 			framework.Logf("Replicas not as expected: got %v, expected %v", current, expected) | ||||
| 			return false, nil | ||||
| 		} | ||||
| 		return true, nil | ||||
| 	} | ||||
|  | ||||
| 	if err = wait.PollUntilContextTimeout(ctx, 2*time.Second, timeout, false, condition); err != nil { | ||||
| 		return fmt.Errorf("err waiting for DNS replicas to satisfy %v, got %v: %w", expected, current, err) | ||||
| 	} | ||||
| 	framework.Logf("kube-dns reaches expected replicas: %v", expected) | ||||
| 	return nil | ||||
| } | ||||
|  | ||||
| func waitForDNSConfigMapCreated(ctx context.Context, c clientset.Interface, timeout time.Duration, configMapName string) (configMap *v1.ConfigMap, err error) { | ||||
| 	framework.Logf("Waiting up to %v for DNS autoscaling ConfigMap to be re-created", timeout) | ||||
| 	condition := func(ctx context.Context) (bool, error) { | ||||
| 		configMap, err = fetchDNSScalingConfigMap(ctx, c, configMapName) | ||||
| 		if err != nil { | ||||
| 			return false, nil | ||||
| 		} | ||||
| 		return true, nil | ||||
| 	} | ||||
|  | ||||
| 	if err = wait.PollUntilContextTimeout(ctx, time.Second, timeout, false, condition); err != nil { | ||||
| 		return nil, fmt.Errorf("err waiting for DNS autoscaling ConfigMap got re-created: %w", err) | ||||
| 	} | ||||
| 	return configMap, nil | ||||
| } | ||||
| @@ -20,9 +20,4 @@ const ( | ||||
| 	// NVIDIAGPUResourceName is the extended name of the GPU resource since v1.8 | ||||
| 	// this uses the device plugin mechanism | ||||
| 	NVIDIAGPUResourceName = "nvidia.com/gpu" | ||||
|  | ||||
| 	// GPUDevicePluginDSYAML is the official Google Device Plugin Daemonset NVIDIA GPU manifest for GKE | ||||
| 	// TODO: Parametrize it by making it a feature in TestFramework. | ||||
| 	// so we can override the daemonset in other setups (non COS). | ||||
| 	GPUDevicePluginDSYAML = "https://raw.githubusercontent.com/kubernetes/kubernetes/master/cluster/addons/device-plugins/nvidia-gpu/daemonset.yaml" | ||||
| ) | ||||
|   | ||||
| @@ -32,7 +32,6 @@ import ( | ||||
| 	internalapi "k8s.io/cri-api/pkg/apis" | ||||
| 	runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1" | ||||
| 	commontest "k8s.io/kubernetes/test/e2e/common" | ||||
| 	e2egpu "k8s.io/kubernetes/test/e2e/framework/gpu" | ||||
| 	e2emanifest "k8s.io/kubernetes/test/e2e/framework/manifest" | ||||
| 	e2epod "k8s.io/kubernetes/test/e2e/framework/pod" | ||||
| 	e2etestfiles "k8s.io/kubernetes/test/e2e/framework/testfiles" | ||||
| @@ -83,11 +82,6 @@ func updateImageAllowList(ctx context.Context) { | ||||
| 	} else { | ||||
| 		e2epod.ImagePrePullList.Insert(sriovDevicePluginImage) | ||||
| 	} | ||||
| 	if gpuDevicePluginImage, err := getGPUDevicePluginImage(ctx); err != nil { | ||||
| 		klog.Errorln(err) | ||||
| 	} else { | ||||
| 		e2epod.ImagePrePullList.Insert(gpuDevicePluginImage) | ||||
| 	} | ||||
| 	if samplePluginImage, err := getContainerImageFromE2ETestDaemonset(SampleDevicePluginDSYAML); err != nil { | ||||
| 		klog.Errorln(err) | ||||
| 	} else { | ||||
| @@ -217,21 +211,6 @@ func PrePullAllImages() error { | ||||
| 	return utilerrors.NewAggregate(pullErrs) | ||||
| } | ||||
|  | ||||
| // getGPUDevicePluginImage returns the image of GPU device plugin. | ||||
| func getGPUDevicePluginImage(ctx context.Context) (string, error) { | ||||
| 	ds, err := e2emanifest.DaemonSetFromURL(ctx, e2egpu.GPUDevicePluginDSYAML) | ||||
| 	if err != nil { | ||||
| 		return "", fmt.Errorf("failed to parse the device plugin image: %w", err) | ||||
| 	} | ||||
| 	if ds == nil { | ||||
| 		return "", fmt.Errorf("failed to parse the device plugin image: the extracted DaemonSet is nil") | ||||
| 	} | ||||
| 	if len(ds.Spec.Template.Spec.Containers) < 1 { | ||||
| 		return "", fmt.Errorf("failed to parse the device plugin image: cannot extract the container from YAML") | ||||
| 	} | ||||
| 	return ds.Spec.Template.Spec.Containers[0].Image, nil | ||||
| } | ||||
|  | ||||
| func getContainerImageFromE2ETestDaemonset(dsYamlPath string) (string, error) { | ||||
| 	data, err := e2etestfiles.Read(dsYamlPath) | ||||
| 	if err != nil { | ||||
|   | ||||
| @@ -1,27 +0,0 @@ | ||||
| #cloud-config | ||||
|  | ||||
| runcmd: | ||||
|   - modprobe configs | ||||
|   # Install GPU drivers - https://cloud.google.com/container-optimized-os/docs/how-to/run-gpus | ||||
|   - cos-extensions install gpu | ||||
|   - mount --bind /var/lib/nvidia /var/lib/nvidia | ||||
|   - mount -o remount,exec /var/lib/nvidia /var/lib/nvidia | ||||
|   # Run nvidia-smi to verify installation | ||||
|   - /var/lib/nvidia/bin/nvidia-smi | ||||
|   # Remove build containers. They're very large. | ||||
|   - docker rm -f $(docker ps -aq) | ||||
|   # Standard installation proceeds | ||||
|   - mount /tmp /tmp -o remount,exec,suid | ||||
|   - usermod -a -G docker jenkins | ||||
|   - mkdir -p /var/lib/kubelet | ||||
|   - mkdir -p /home/kubernetes/containerized_mounter/rootfs | ||||
|   - mount --bind /home/kubernetes/containerized_mounter/ /home/kubernetes/containerized_mounter/ | ||||
|   - mount -o remount, exec /home/kubernetes/containerized_mounter/ | ||||
|   - wget https://storage.googleapis.com/kubernetes-release/gci-mounter/mounter.tar -O /tmp/mounter.tar | ||||
|   - tar xvf /tmp/mounter.tar -C /home/kubernetes/containerized_mounter/rootfs | ||||
|   - mkdir -p /home/kubernetes/containerized_mounter/rootfs/var/lib/kubelet | ||||
|   - mount --rbind /var/lib/kubelet /home/kubernetes/containerized_mounter/rootfs/var/lib/kubelet | ||||
|   - mount --make-rshared /home/kubernetes/containerized_mounter/rootfs/var/lib/kubelet | ||||
|   - mount --bind /proc /home/kubernetes/containerized_mounter/rootfs/proc | ||||
|   - mount --bind /dev /home/kubernetes/containerized_mounter/rootfs/dev | ||||
|   - rm /tmp/mounter.tar | ||||
		Reference in New Issue
	
	Block a user
	 Davanum Srinivas
					Davanum Srinivas