mirror of
				https://github.com/optim-enterprises-bv/kubernetes.git
				synced 2025-11-04 04:08:16 +00:00 
			
		
		
		
	Merge pull request #43453 from MaciekPytel/ca_more_drain_e2e
Automatic merge from submit-queue (batch tested with PRs 43453, 42992) Cluster-autoscaler multistep node drain e2e **What this PR does / why we need it**: This is another Cluster-Autoscaler node drain test. It tests draining a node, when there is pdb allowing for moving some pods, but not all the required pods in one go. Basically it's a more complex scenario of other CA drain e2e, testing the retry logic in CA scale down. **Which issue this PR fixes** *(optional, in `fixes #<issue number>(, fixes #<issue_number>, ...)` format, will close that issue when PR gets merged)*: fixes # **Special notes for your reviewer**: **Release note**: ```release-note ```
This commit is contained in:
		@@ -330,7 +330,7 @@ var _ = framework.KubeDescribe("Cluster size autoscaling [Slow]", func() {
 | 
			
		||||
	})
 | 
			
		||||
 | 
			
		||||
	It("should be able to scale down when rescheduling a pod is required and pdb allows for it[Feature:ClusterSizeAutoscalingScaleDown]", func() {
 | 
			
		||||
		runDrainTest(f, originalSizes, 1, func(increasedSize int) {
 | 
			
		||||
		runDrainTest(f, originalSizes, 1, 1, func(increasedSize int) {
 | 
			
		||||
			By("Some node should be removed")
 | 
			
		||||
			framework.ExpectNoError(WaitForClusterSizeFunc(f.ClientSet,
 | 
			
		||||
				func(size int) bool { return size < increasedSize }, scaleDownTimeout))
 | 
			
		||||
@@ -338,7 +338,7 @@ var _ = framework.KubeDescribe("Cluster size autoscaling [Slow]", func() {
 | 
			
		||||
	})
 | 
			
		||||
 | 
			
		||||
	It("shouldn't be able to scale down when rescheduling a pod is required, but pdb doesn't allow drain[Feature:ClusterSizeAutoscalingScaleDown]", func() {
 | 
			
		||||
		runDrainTest(f, originalSizes, 0, func(increasedSize int) {
 | 
			
		||||
		runDrainTest(f, originalSizes, 1, 0, func(increasedSize int) {
 | 
			
		||||
			By("No nodes should be removed")
 | 
			
		||||
			time.Sleep(scaleDownTimeout)
 | 
			
		||||
			nodes := framework.GetReadySchedulableNodesOrDie(f.ClientSet)
 | 
			
		||||
@@ -346,9 +346,17 @@ var _ = framework.KubeDescribe("Cluster size autoscaling [Slow]", func() {
 | 
			
		||||
		})
 | 
			
		||||
	})
 | 
			
		||||
 | 
			
		||||
	It("should be able to scale down by draining multiple pods one by one as dictated by pdb[Feature:ClusterSizeAutoscalingScaleDown]", func() {
 | 
			
		||||
		runDrainTest(f, originalSizes, 2, 1, func(increasedSize int) {
 | 
			
		||||
			By("Some node should be removed")
 | 
			
		||||
			framework.ExpectNoError(WaitForClusterSizeFunc(f.ClientSet,
 | 
			
		||||
				func(size int) bool { return size < increasedSize }, scaleDownTimeout))
 | 
			
		||||
		})
 | 
			
		||||
	})
 | 
			
		||||
 | 
			
		||||
})
 | 
			
		||||
 | 
			
		||||
func runDrainTest(f *framework.Framework, migSizes map[string]int, pdbSize int, verifyFunction func(int)) {
 | 
			
		||||
func runDrainTest(f *framework.Framework, migSizes map[string]int, podsPerNode, pdbSize int, verifyFunction func(int)) {
 | 
			
		||||
	increasedSize := manuallyIncreaseClusterSize(f, migSizes)
 | 
			
		||||
 | 
			
		||||
	nodes, err := f.ClientSet.Core().Nodes().List(metav1.ListOptions{FieldSelector: fields.Set{
 | 
			
		||||
@@ -356,10 +364,10 @@ func runDrainTest(f *framework.Framework, migSizes map[string]int, pdbSize int,
 | 
			
		||||
	}.AsSelector().String()})
 | 
			
		||||
	framework.ExpectNoError(err)
 | 
			
		||||
	namespace := f.Namespace.Name
 | 
			
		||||
	numPods := len(nodes.Items)
 | 
			
		||||
	numPods := len(nodes.Items) * podsPerNode
 | 
			
		||||
	testId := string(uuid.NewUUID()) // So that we can label and find pods
 | 
			
		||||
	labelMap := map[string]string{"test_id": testId}
 | 
			
		||||
	framework.ExpectNoError(runReplicatedPodOnEachNode(f, nodes.Items, "reschedulable-pods", labelMap))
 | 
			
		||||
	framework.ExpectNoError(runReplicatedPodOnEachNode(f, nodes.Items, podsPerNode, "reschedulable-pods", labelMap))
 | 
			
		||||
 | 
			
		||||
	defer framework.DeleteRCAndPods(f.ClientSet, f.InternalClientset, f.Namespace.Name, "reschedulable-pods")
 | 
			
		||||
 | 
			
		||||
@@ -666,55 +674,73 @@ func setMigSizes(sizes map[string]int) {
 | 
			
		||||
 | 
			
		||||
func makeNodeUnschedulable(c clientset.Interface, node *v1.Node) error {
 | 
			
		||||
	By(fmt.Sprintf("Taint node %s", node.Name))
 | 
			
		||||
	freshNode, err := c.Core().Nodes().Get(node.Name, metav1.GetOptions{})
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		return err
 | 
			
		||||
	}
 | 
			
		||||
	for _, taint := range freshNode.Spec.Taints {
 | 
			
		||||
		if taint.Key == disabledTaint {
 | 
			
		||||
	for j := 0; j < 3; j++ {
 | 
			
		||||
		freshNode, err := c.Core().Nodes().Get(node.Name, metav1.GetOptions{})
 | 
			
		||||
		if err != nil {
 | 
			
		||||
			return err
 | 
			
		||||
		}
 | 
			
		||||
		for _, taint := range freshNode.Spec.Taints {
 | 
			
		||||
			if taint.Key == disabledTaint {
 | 
			
		||||
				return nil
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
		freshNode.Spec.Taints = append(freshNode.Spec.Taints, v1.Taint{
 | 
			
		||||
			Key:    disabledTaint,
 | 
			
		||||
			Value:  "DisabledForTest",
 | 
			
		||||
			Effect: v1.TaintEffectNoSchedule,
 | 
			
		||||
		})
 | 
			
		||||
		_, err = c.Core().Nodes().Update(freshNode)
 | 
			
		||||
		if err == nil {
 | 
			
		||||
			return nil
 | 
			
		||||
		}
 | 
			
		||||
		if !errors.IsConflict(err) {
 | 
			
		||||
			return err
 | 
			
		||||
		}
 | 
			
		||||
		glog.Warningf("Got 409 conflict when trying to taint node, retries left: %v", 3-j)
 | 
			
		||||
	}
 | 
			
		||||
	freshNode.Spec.Taints = append(freshNode.Spec.Taints, v1.Taint{
 | 
			
		||||
		Key:    disabledTaint,
 | 
			
		||||
		Value:  "DisabledForTest",
 | 
			
		||||
		Effect: v1.TaintEffectNoSchedule,
 | 
			
		||||
	})
 | 
			
		||||
	_, err = c.Core().Nodes().Update(freshNode)
 | 
			
		||||
	return err
 | 
			
		||||
	return fmt.Errorf("Failed to taint node in allowed number of retries")
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func makeNodeSchedulable(c clientset.Interface, node *v1.Node) error {
 | 
			
		||||
	By(fmt.Sprintf("Remove taint from node %s", node.Name))
 | 
			
		||||
	freshNode, err := c.Core().Nodes().Get(node.Name, metav1.GetOptions{})
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		return err
 | 
			
		||||
	}
 | 
			
		||||
	newTaints := make([]v1.Taint, 0)
 | 
			
		||||
	for _, taint := range freshNode.Spec.Taints {
 | 
			
		||||
		if taint.Key != disabledTaint {
 | 
			
		||||
			newTaints = append(newTaints, taint)
 | 
			
		||||
	for j := 0; j < 3; j++ {
 | 
			
		||||
		freshNode, err := c.Core().Nodes().Get(node.Name, metav1.GetOptions{})
 | 
			
		||||
		if err != nil {
 | 
			
		||||
			return err
 | 
			
		||||
		}
 | 
			
		||||
		newTaints := make([]v1.Taint, 0)
 | 
			
		||||
		for _, taint := range freshNode.Spec.Taints {
 | 
			
		||||
			if taint.Key != disabledTaint {
 | 
			
		||||
				newTaints = append(newTaints, taint)
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if len(newTaints) != len(freshNode.Spec.Taints) {
 | 
			
		||||
		if len(newTaints) == len(freshNode.Spec.Taints) {
 | 
			
		||||
			return nil
 | 
			
		||||
		}
 | 
			
		||||
		freshNode.Spec.Taints = newTaints
 | 
			
		||||
		_, err = c.Core().Nodes().Update(freshNode)
 | 
			
		||||
		return err
 | 
			
		||||
		if err == nil {
 | 
			
		||||
			return nil
 | 
			
		||||
		}
 | 
			
		||||
		if !errors.IsConflict(err) {
 | 
			
		||||
			return err
 | 
			
		||||
		}
 | 
			
		||||
		glog.Warningf("Got 409 conflict when trying to taint node, retries left: %v", 3-j)
 | 
			
		||||
	}
 | 
			
		||||
	return nil
 | 
			
		||||
	return fmt.Errorf("Failed to remove taint from node in allowed number of retries")
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Creat an RC running a single pod on each node without adding any constraint forcing such
 | 
			
		||||
// pod distribution. This is meant to create a bunch of underutilized (but not unused) nodes
 | 
			
		||||
// Creat an RC running a given number of pods on each node without adding any constraint forcing
 | 
			
		||||
// such pod distribution. This is meant to create a bunch of underutilized (but not unused) nodes
 | 
			
		||||
// with pods that can be rescheduled on different nodes.
 | 
			
		||||
// This is achieved using the following method:
 | 
			
		||||
// 1. disable scheduling on each node
 | 
			
		||||
// 2. create an empty RC
 | 
			
		||||
// 3. for each node:
 | 
			
		||||
// 3a. enable scheduling on that node
 | 
			
		||||
// 3b. increase number of replicas in RC by 1
 | 
			
		||||
func runReplicatedPodOnEachNode(f *framework.Framework, nodes []v1.Node, id string, labels map[string]string) error {
 | 
			
		||||
// 3b. increase number of replicas in RC by podsPerNode
 | 
			
		||||
func runReplicatedPodOnEachNode(f *framework.Framework, nodes []v1.Node, podsPerNode int, id string, labels map[string]string) error {
 | 
			
		||||
	By("Run a pod on each node")
 | 
			
		||||
	for _, node := range nodes {
 | 
			
		||||
		err := makeNodeUnschedulable(f.ClientSet, &node)
 | 
			
		||||
@@ -754,7 +780,7 @@ func runReplicatedPodOnEachNode(f *framework.Framework, nodes []v1.Node, id stri
 | 
			
		||||
		// Update replicas count, to create new pods that will be allocated on node
 | 
			
		||||
		// (we retry 409 errors in case rc reference got out of sync)
 | 
			
		||||
		for j := 0; j < 3; j++ {
 | 
			
		||||
			*rc.Spec.Replicas = int32(i + 1)
 | 
			
		||||
			*rc.Spec.Replicas = int32((i + 1) * podsPerNode)
 | 
			
		||||
			rc, err = f.ClientSet.Core().ReplicationControllers(f.Namespace.Name).Update(rc)
 | 
			
		||||
			if err == nil {
 | 
			
		||||
				break
 | 
			
		||||
@@ -771,7 +797,7 @@ func runReplicatedPodOnEachNode(f *framework.Framework, nodes []v1.Node, id stri
 | 
			
		||||
 | 
			
		||||
		err = wait.PollImmediate(5*time.Second, podTimeout, func() (bool, error) {
 | 
			
		||||
			rc, err = f.ClientSet.Core().ReplicationControllers(f.Namespace.Name).Get(id, metav1.GetOptions{})
 | 
			
		||||
			if err != nil || rc.Status.ReadyReplicas < int32(i+1) {
 | 
			
		||||
			if err != nil || rc.Status.ReadyReplicas < int32((i+1)*podsPerNode) {
 | 
			
		||||
				return false, nil
 | 
			
		||||
			}
 | 
			
		||||
			return true, nil
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user