mirror of
				https://github.com/optim-enterprises-bv/kubernetes.git
				synced 2025-11-04 04:08:16 +00:00 
			
		
		
		
	Scheduler: introduce CheckNodeMemoryPressurePredicate, don't schedule pods for nodes that reports memory pressury.
Introduce unit-test for CheckNodeMemoryPressurePredicate Following work done in #14943
This commit is contained in:
		@@ -48,6 +48,7 @@ The purpose of filtering the nodes is to filter out the nodes that do not meet c
 | 
			
		||||
- `MatchNodeSelector`: Check if the labels of the node match the labels specified in the Pod's `nodeSelector` field and, as of Kubernetes v1.2, also match the `scheduler.alpha.kubernetes.io/affinity` pod annotation if present. See [here](../user-guide/node-selection/) for more details on both.
 | 
			
		||||
- `MaxEBSVolumeCount`: Ensure that the number of attached ElasticBlockStore volumes does not exceed a maximum value (by default, 39, since Amazon recommends a maximum of 40 with one of those 40 reserved for the root volume -- see [Amazon's documentation](http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/volume_limits.html#linux-specific-volume-limits)).  The maximum value can be controlled by setting the `KUBE_MAX_PD_VOLS` environment variable.
 | 
			
		||||
- `MaxGCEPDVolumeCount`: Ensure that the number of attached GCE PersistentDisk volumes does not exceed a maximum value (by default, 16, which is the maximum GCE allows -- see [GCE's documentation](https://cloud.google.com/compute/docs/disks/persistent-disks#limits_for_predefined_machine_types)).  The maximum value can be controlled by setting the `KUBE_MAX_PD_VOLS` environment variable.
 | 
			
		||||
- `CheckNodeMemoryPressure`: Check if a pod can be scheduled on a node reporting memory pressure condition. Currently, no ``BestEffort`` should be placed on a node under memory pressure as it gets automatically evicted by kubelet.
 | 
			
		||||
 | 
			
		||||
The details of the above predicates can be found in [plugin/pkg/scheduler/algorithm/predicates/predicates.go](http://releases.k8s.io/HEAD/plugin/pkg/scheduler/algorithm/predicates/predicates.go). All predicates mentioned above can be used in combination to perform a sophisticated filtering policy. Kubernetes uses some, but not all, of these predicates by default. You can see which ones are used by default in [plugin/pkg/scheduler/algorithmprovider/defaults/defaults.go](http://releases.k8s.io/HEAD/plugin/pkg/scheduler/algorithmprovider/defaults/defaults.go).
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -38,6 +38,7 @@ var (
 | 
			
		||||
	ErrNodeLabelPresenceViolated = newPredicateFailureError("CheckNodeLabelPresence")
 | 
			
		||||
	ErrServiceAffinityViolated   = newPredicateFailureError("CheckServiceAffinity")
 | 
			
		||||
	ErrMaxVolumeCountExceeded    = newPredicateFailureError("MaxVolumeCount")
 | 
			
		||||
	ErrNodeUnderMemoryPressure   = newPredicateFailureError("NodeUnderMemoryPressure")
 | 
			
		||||
	// ErrFakePredicate is used for test only. The fake predicates returning false also returns error
 | 
			
		||||
	// as ErrFakePredicate.
 | 
			
		||||
	ErrFakePredicate = newPredicateFailureError("FakePredicateError")
 | 
			
		||||
 
 | 
			
		||||
@@ -23,6 +23,7 @@ import (
 | 
			
		||||
	"k8s.io/kubernetes/pkg/api"
 | 
			
		||||
	"k8s.io/kubernetes/pkg/api/unversioned"
 | 
			
		||||
	"k8s.io/kubernetes/pkg/client/cache"
 | 
			
		||||
	qosutil "k8s.io/kubernetes/pkg/kubelet/qos/util"
 | 
			
		||||
	"k8s.io/kubernetes/pkg/labels"
 | 
			
		||||
	"k8s.io/kubernetes/plugin/pkg/scheduler/algorithm"
 | 
			
		||||
	priorityutil "k8s.io/kubernetes/plugin/pkg/scheduler/algorithm/priorities/util"
 | 
			
		||||
@@ -999,3 +1000,31 @@ func tolerationsToleratesTaints(tolerations []api.Toleration, taints []api.Taint
 | 
			
		||||
 | 
			
		||||
	return true
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Determine if a pod is scheduled with best-effort QoS
 | 
			
		||||
func isPodBestEffort(pod *api.Pod) bool {
 | 
			
		||||
	return qosutil.GetPodQos(pod) == qosutil.BestEffort
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// CheckNodeMemoryPressurePredicate checks if a pod can be scheduled on a node
 | 
			
		||||
// reporting memory pressure condition.
 | 
			
		||||
func CheckNodeMemoryPressurePredicate(pod *api.Pod, nodeInfo *schedulercache.NodeInfo) (bool, error) {
 | 
			
		||||
	node := nodeInfo.Node()
 | 
			
		||||
	if node == nil {
 | 
			
		||||
		return false, fmt.Errorf("node not found")
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	// pod is not BestEffort pod
 | 
			
		||||
	if !isPodBestEffort(pod) {
 | 
			
		||||
		return true, nil
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	// is node under presure?
 | 
			
		||||
	for _, cond := range node.Status.Conditions {
 | 
			
		||||
		if cond.Type == api.NodeMemoryPressure && cond.Status == api.ConditionTrue {
 | 
			
		||||
			return false, ErrNodeUnderMemoryPressure
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	return true, nil
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
@@ -2641,3 +2641,111 @@ func TestPodToleratesTaints(t *testing.T) {
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func makeEmptyNodeInfo(node *api.Node) *schedulercache.NodeInfo {
 | 
			
		||||
	nodeInfo := schedulercache.NewNodeInfo()
 | 
			
		||||
	nodeInfo.SetNode(node)
 | 
			
		||||
	return nodeInfo
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func TestPodSchedulesOnNodeWithMemoryPressureCondition(t *testing.T) {
 | 
			
		||||
	// specify best-effort pod
 | 
			
		||||
	bestEffortPod := &api.Pod{
 | 
			
		||||
		Spec: api.PodSpec{
 | 
			
		||||
			Containers: []api.Container{
 | 
			
		||||
				{
 | 
			
		||||
					Name:            "container",
 | 
			
		||||
					Image:           "image",
 | 
			
		||||
					ImagePullPolicy: "Always",
 | 
			
		||||
					// no requirements -> best effort pod
 | 
			
		||||
					Resources: api.ResourceRequirements{},
 | 
			
		||||
				},
 | 
			
		||||
			},
 | 
			
		||||
		},
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	// specify non-best-effort pod
 | 
			
		||||
	nonBestEffortPod := &api.Pod{
 | 
			
		||||
		Spec: api.PodSpec{
 | 
			
		||||
			Containers: []api.Container{
 | 
			
		||||
				{
 | 
			
		||||
					Name:            "container",
 | 
			
		||||
					Image:           "image",
 | 
			
		||||
					ImagePullPolicy: "Always",
 | 
			
		||||
					// at least one requirement -> burstable pod
 | 
			
		||||
					Resources: api.ResourceRequirements{
 | 
			
		||||
						Requests: makeAllocatableResources(100, 100, 100, 100),
 | 
			
		||||
					},
 | 
			
		||||
				},
 | 
			
		||||
			},
 | 
			
		||||
		},
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	// specify a node with no memory pressure condition on
 | 
			
		||||
	noMemoryPressureNode := &api.Node{
 | 
			
		||||
		Status: api.NodeStatus{
 | 
			
		||||
			Conditions: []api.NodeCondition{
 | 
			
		||||
				{
 | 
			
		||||
					Type:   "Ready",
 | 
			
		||||
					Status: "True",
 | 
			
		||||
				},
 | 
			
		||||
			},
 | 
			
		||||
		},
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	// specify a node with memory pressure condition on
 | 
			
		||||
	memoryPressureNode := &api.Node{
 | 
			
		||||
		Status: api.NodeStatus{
 | 
			
		||||
			Conditions: []api.NodeCondition{
 | 
			
		||||
				{
 | 
			
		||||
					Type:   "MemoryPressure",
 | 
			
		||||
					Status: "True",
 | 
			
		||||
				},
 | 
			
		||||
			},
 | 
			
		||||
		},
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	tests := []struct {
 | 
			
		||||
		pod      *api.Pod
 | 
			
		||||
		nodeInfo *schedulercache.NodeInfo
 | 
			
		||||
		fits     bool
 | 
			
		||||
		name     string
 | 
			
		||||
	}{
 | 
			
		||||
		{
 | 
			
		||||
			pod:      bestEffortPod,
 | 
			
		||||
			nodeInfo: makeEmptyNodeInfo(noMemoryPressureNode),
 | 
			
		||||
			fits:     true,
 | 
			
		||||
			name:     "best-effort pod schedulable on node without memory pressure condition on",
 | 
			
		||||
		},
 | 
			
		||||
		{
 | 
			
		||||
			pod:      bestEffortPod,
 | 
			
		||||
			nodeInfo: makeEmptyNodeInfo(memoryPressureNode),
 | 
			
		||||
			fits:     false,
 | 
			
		||||
			name:     "best-effort pod not schedulable on node with memory pressure condition on",
 | 
			
		||||
		},
 | 
			
		||||
		{
 | 
			
		||||
			pod:      nonBestEffortPod,
 | 
			
		||||
			nodeInfo: makeEmptyNodeInfo(memoryPressureNode),
 | 
			
		||||
			fits:     true,
 | 
			
		||||
			name:     "non best-effort pod schedulable on node with memory pressure condition on",
 | 
			
		||||
		},
 | 
			
		||||
		{
 | 
			
		||||
			pod:      nonBestEffortPod,
 | 
			
		||||
			nodeInfo: makeEmptyNodeInfo(noMemoryPressureNode),
 | 
			
		||||
			fits:     true,
 | 
			
		||||
			name:     "non best-effort pod schedulable on node without memory pressure condition on",
 | 
			
		||||
		},
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	for _, test := range tests {
 | 
			
		||||
		fits, err := CheckNodeMemoryPressurePredicate(test.pod, test.nodeInfo)
 | 
			
		||||
		if fits != test.fits {
 | 
			
		||||
			t.Errorf("%s: expected %v got %v", test.name, test.fits, fits)
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		if err != nil && err != ErrNodeUnderMemoryPressure {
 | 
			
		||||
			t.Errorf("%s: unexpected error: %v", test.name, err)
 | 
			
		||||
			continue
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
@@ -153,6 +153,9 @@ func defaultPredicates() sets.String {
 | 
			
		||||
				return predicates.NewTolerationMatchPredicate(args.NodeInfo)
 | 
			
		||||
			},
 | 
			
		||||
		),
 | 
			
		||||
 | 
			
		||||
		// Fit is determined by node memory pressure condition.
 | 
			
		||||
		factory.RegisterFitPredicate("CheckNodeMemoryPressure", predicates.CheckNodeMemoryPressurePredicate),
 | 
			
		||||
	)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user