move predicates into library (address #12744)

DONE:
1. refactor all predicates: predicates return fitOrNot(bool) and error(Error) in which the latter is of type
	PredicateFailureError or InsufficientResourceError. (For violation of either MaxEBSVolumeCount or
        MaxGCEPDVolumeCount, returns one same error type as ErrMaxVolumeCountExceeded)
2. GeneralPredicates() is a predicate function, which includes serveral other predicate functions (PodFitsResource,
        PodFitsHost, PodFitsHostPort). It is registered as one of the predicates in DefaultAlgorithmProvider, and
        is also called in canAdmitPod() in Kubelet and should be called by other components (like rescheduler, etc)
        if necessary. See discussion in issue #12744
3. remove podNumber check from GeneralPredicates
4. HostName is now verified in Kubelet's canAdminPod(). add TestHostNameConflicts in kubelet_test.go
5. add getNodeAnyWay() method in Kubelet to get node information in standaloneMode

TODO:
1. determine which predicates should be included in GeneralPredicates()
2. separate GeneralPredicates() into:
	a. GeneralPredicatesEvictPod() and
	b. GeneralPredicatesNotEvictPod()
3. DaemonSet should use GeneralPredicates()
This commit is contained in:
HaiyangDING
2016-01-06 09:10:59 +08:00
parent e31177219b
commit 41ed85479a
10 changed files with 475 additions and 92 deletions

View File

@@ -24,11 +24,27 @@ const (
memoryResoureceName string = "Memory"
)
var (
// The predicateName tries to be consistent as the predicate name used in DefaultAlgorithmProvider defined in
// defaults.go (which tend to be stable for backward compatibility)
ErrDiskConflict = newPredicateFailureError("NoDiskConflict")
ErrVolumeZoneConflict = newPredicateFailureError("NoVolumeZoneConflict")
ErrNodeSelectorNotMatch = newPredicateFailureError("MatchNodeSelector")
ErrPodNotMatchHostName = newPredicateFailureError("HostName")
ErrPodNotFitsHostPorts = newPredicateFailureError("PodFitsHostPorts")
ErrNodeLabelPresenceViolated = newPredicateFailureError("CheckNodeLabelPresence")
ErrServiceAffinityViolated = newPredicateFailureError("CheckServiceAffinity")
ErrMaxVolumeCountExceeded = newPredicateFailureError("MaxVolumeCount")
// ErrFakePredicateError is used for test only. The fake predicates returning false also returns error
// as ErrFakePredicateError.
ErrFakePredicateError = newPredicateFailureError("false")
)
// InsufficientResourceError is an error type that indicates what kind of resource limit is
// hit and caused the unfitting failure.
type InsufficientResourceError struct {
// resourceName is the name of the resource that is insufficient
resourceName string
ResourceName string
requested int64
used int64
capacity int64
@@ -36,7 +52,7 @@ type InsufficientResourceError struct {
func newInsufficientResourceError(resourceName string, requested, used, capacity int64) *InsufficientResourceError {
return &InsufficientResourceError{
resourceName: resourceName,
ResourceName: resourceName,
requested: requested,
used: used,
capacity: capacity,
@@ -45,5 +61,17 @@ func newInsufficientResourceError(resourceName string, requested, used, capacity
func (e *InsufficientResourceError) Error() string {
return fmt.Sprintf("Node didn't have enough resource: %s, requested: %d, used: %d, capacity: %d",
e.resourceName, e.requested, e.used, e.capacity)
e.ResourceName, e.requested, e.used, e.capacity)
}
type PredicateFailureError struct {
PredicateName string
}
func newPredicateFailureError(predicateName string) *PredicateFailureError {
return &PredicateFailureError{predicateName}
}
func (e *PredicateFailureError) Error() string {
return fmt.Sprintf("Predicate %s failed", e.PredicateName)
}

View File

@@ -127,7 +127,7 @@ func NoDiskConflict(pod *api.Pod, nodeName string, nodeInfo *schedulercache.Node
for _, v := range pod.Spec.Volumes {
for _, ev := range nodeInfo.Pods() {
if isVolumeConflict(v, ev) {
return false, nil
return false, ErrDiskConflict
}
}
}
@@ -229,7 +229,8 @@ func (c *MaxPDVolumeCountChecker) predicate(pod *api.Pod, nodeName string, nodeI
numNewVolumes := len(newVolumes)
if numExistingVolumes+numNewVolumes > c.maxVolumes {
return false, nil
// violates MaxEBSVolumeCount or MaxGCEPDVolumeCount
return false, ErrMaxVolumeCountExceeded
}
return true, nil
@@ -362,7 +363,7 @@ func (c *VolumeZoneChecker) predicate(pod *api.Pod, nodeName string, nodeInfo *s
nodeV, _ := nodeConstraints[k]
if v != nodeV {
glog.V(2).Infof("Won't schedule pod %q onto node %q due to volume %q (mismatch on %q)", pod.Name, nodeName, pvName, k)
return false, nil
return false, ErrVolumeZoneConflict
}
}
}
@@ -421,20 +422,9 @@ func podName(pod *api.Pod) string {
return pod.Namespace + "/" + pod.Name
}
// PodFitsResources calculates fit based on requested, rather than used resources
func (r *ResourceFit) PodFitsResources(pod *api.Pod, nodeName string, nodeInfo *schedulercache.NodeInfo) (bool, error) {
info, err := r.info.GetNodeInfo(nodeName)
if err != nil {
return false, err
}
func podFitsResourcesInternal(pod *api.Pod, nodeName string, nodeInfo *schedulercache.NodeInfo, info *api.Node) (bool, error) {
allocatable := info.Status.Allocatable
allowedPodNumber := allocatable.Pods().Value()
if int64(len(nodeInfo.Pods()))+1 > allowedPodNumber {
return false,
newInsufficientResourceError(podCountResourceName, 1, int64(len(nodeInfo.Pods())), allowedPodNumber)
}
podRequest := getResourceRequest(pod)
if podRequest.milliCPU == 0 && podRequest.memory == 0 {
return true, nil
@@ -442,6 +432,7 @@ func (r *ResourceFit) PodFitsResources(pod *api.Pod, nodeName string, nodeInfo *
totalMilliCPU := allocatable.Cpu().MilliValue()
totalMemory := allocatable.Memory().Value()
if totalMilliCPU < podRequest.milliCPU+nodeInfo.RequestedResource().MilliCPU {
return false,
newInsufficientResourceError(cpuResourceName, podRequest.milliCPU, nodeInfo.RequestedResource().MilliCPU, totalMilliCPU)
@@ -455,15 +446,30 @@ func (r *ResourceFit) PodFitsResources(pod *api.Pod, nodeName string, nodeInfo *
return true, nil
}
func (r *NodeStatus) PodFitsResources(pod *api.Pod, nodeName string, nodeInfo *schedulercache.NodeInfo) (bool, error) {
info, err := r.info.GetNodeInfo(nodeName)
if err != nil {
return false, err
}
// TODO: move the following podNumber check to podFitsResourcesInternal when Kubelet allows podNumber check (See #20263).
allocatable := info.Status.Allocatable
allowedPodNumber := allocatable.Pods().Value()
if int64(len(nodeInfo.Pods()))+1 > allowedPodNumber {
return false,
newInsufficientResourceError(podCountResourceName, 1, int64(len(nodeInfo.Pods())), allowedPodNumber)
}
return podFitsResourcesInternal(pod, nodeName, nodeInfo, info)
}
func NewResourceFitPredicate(info NodeInfo) algorithm.FitPredicate {
fit := &ResourceFit{
fit := &NodeStatus{
info: info,
}
return fit.PodFitsResources
}
func NewSelectorMatchPredicate(info NodeInfo) algorithm.FitPredicate {
selector := &NodeSelector{
selector := &NodeStatus{
info: info,
}
return selector.PodSelectorMatches
@@ -542,19 +548,25 @@ type NodeSelector struct {
info NodeInfo
}
func (n *NodeSelector) PodSelectorMatches(pod *api.Pod, nodeName string, nodeInfo *schedulercache.NodeInfo) (bool, error) {
func (n *NodeStatus) PodSelectorMatches(pod *api.Pod, nodeName string, nodeInfo *schedulercache.NodeInfo) (bool, error) {
node, err := n.info.GetNodeInfo(nodeName)
if err != nil {
return false, err
}
return PodMatchesNodeLabels(pod, node), nil
if PodMatchesNodeLabels(pod, node) {
return true, nil
}
return false, ErrNodeSelectorNotMatch
}
func PodFitsHost(pod *api.Pod, nodeName string, nodeInfo *schedulercache.NodeInfo) (bool, error) {
if len(pod.Spec.NodeName) == 0 {
return true, nil
}
return pod.Spec.NodeName == nodeName, nil
if pod.Spec.NodeName == nodeName {
return true, nil
}
return false, ErrPodNotMatchHostName
}
type NodeLabelChecker struct {
@@ -594,7 +606,7 @@ func (n *NodeLabelChecker) CheckNodeLabelPresence(pod *api.Pod, nodeName string,
for _, label := range n.labels {
exists = nodeLabels.Has(label)
if (exists && !n.presence) || (!exists && n.presence) {
return false, nil
return false, ErrNodeLabelPresenceViolated
}
}
return true, nil
@@ -692,7 +704,10 @@ func (s *ServiceAffinity) CheckServiceAffinity(pod *api.Pod, nodeName string, no
}
// check if the node matches the selector
return affinitySelector.Matches(labels.Set(node.Labels)), nil
if affinitySelector.Matches(labels.Set(node.Labels)) {
return true, nil
}
return false, ErrServiceAffinityViolated
}
func PodFitsHostPorts(pod *api.Pod, nodeName string, nodeInfo *schedulercache.NodeInfo) (bool, error) {
@@ -706,7 +721,7 @@ func PodFitsHostPorts(pod *api.Pod, nodeName string, nodeInfo *schedulercache.No
continue
}
if existingPorts[wport] {
return false, nil
return false, ErrPodNotFitsHostPorts
}
}
return true, nil
@@ -735,3 +750,41 @@ func haveSame(a1, a2 []string) bool {
}
return false
}
type NodeStatus struct {
info NodeInfo
}
func GeneralPredicates(info NodeInfo) algorithm.FitPredicate {
node := &NodeStatus{
info: info,
}
return node.SchedulerGeneralPredicates
}
func (n *NodeStatus) SchedulerGeneralPredicates(pod *api.Pod, nodeName string, nodeInfo *schedulercache.NodeInfo) (bool, error) {
node, err := n.info.GetNodeInfo(nodeName)
if err != nil {
return false, err
}
return RunGeneralPredicates(pod, nodeName, nodeInfo, node)
}
func RunGeneralPredicates(pod *api.Pod, nodeName string, nodeInfo *schedulercache.NodeInfo, node *api.Node) (bool, error) {
fit, err := podFitsResourcesInternal(pod, nodeName, nodeInfo, node)
if !fit {
return fit, err
}
fit, err = PodFitsHost(pod, nodeName, nodeInfo)
if !fit {
return fit, err
}
fit, err = PodFitsHostPorts(pod, nodeName, nodeInfo)
if !fit {
return fit, err
}
if !PodMatchesNodeLabels(pod, node) {
return false, ErrNodeSelectorNotMatch
}
return true, nil
}

View File

@@ -159,7 +159,7 @@ func TestPodFitsResources(t *testing.T) {
for _, test := range enoughPodsTests {
node := api.Node{Status: api.NodeStatus{Capacity: makeResources(10, 20, 32).Capacity, Allocatable: makeAllocatableResources(10, 20, 32)}}
fit := ResourceFit{FakeNodeInfo(node)}
fit := NodeStatus{FakeNodeInfo(node)}
fits, err := fit.PodFitsResources(test.pod, "machine", test.nodeInfo)
if !reflect.DeepEqual(err, test.wErr) {
t.Errorf("%s: unexpected error: %v, want: %v", test.test, err, test.wErr)
@@ -204,7 +204,7 @@ func TestPodFitsResources(t *testing.T) {
for _, test := range notEnoughPodsTests {
node := api.Node{Status: api.NodeStatus{Capacity: api.ResourceList{}, Allocatable: makeAllocatableResources(10, 20, 1)}}
fit := ResourceFit{FakeNodeInfo(node)}
fit := NodeStatus{FakeNodeInfo(node)}
fits, err := fit.PodFitsResources(test.pod, "machine", test.nodeInfo)
if !reflect.DeepEqual(err, test.wErr) {
t.Errorf("%s: unexpected error: %v, want: %v", test.test, err, test.wErr)
@@ -252,11 +252,14 @@ func TestPodFitsHost(t *testing.T) {
for _, test := range tests {
result, err := PodFitsHost(test.pod, test.node, schedulercache.NewNodeInfo())
if err != nil {
if !reflect.DeepEqual(err, ErrPodNotMatchHostName) && err != nil {
t.Errorf("unexpected error: %v", err)
}
if result == false && !reflect.DeepEqual(err, ErrPodNotMatchHostName) {
t.Errorf("unexpected error: %v", err)
}
if result != test.fits {
t.Errorf("unexpected difference for %s: got: %v expected %v", test.test, test.fits, result)
t.Errorf("unexpected difference for %s: expected: %v got %v", test.test, test.fits, result)
}
}
}
@@ -322,7 +325,10 @@ func TestPodFitsHostPorts(t *testing.T) {
}
for _, test := range tests {
fits, err := PodFitsHostPorts(test.pod, "machine", test.nodeInfo)
if err != nil {
if !reflect.DeepEqual(err, ErrPodNotFitsHostPorts) && err != nil {
t.Errorf("unexpected error: %v", err)
}
if fits == false && !reflect.DeepEqual(err, ErrPodNotFitsHostPorts) {
t.Errorf("unexpected error: %v", err)
}
if test.fits != fits {
@@ -404,8 +410,11 @@ func TestDiskConflicts(t *testing.T) {
for _, test := range tests {
ok, err := NoDiskConflict(test.pod, "machine", test.nodeInfo)
if err != nil {
t.Fatalf("unexpected error: %v", err)
if !reflect.DeepEqual(err, ErrDiskConflict) && err != nil {
t.Errorf("unexpected error: %v", err)
}
if ok == false && !reflect.DeepEqual(err, ErrDiskConflict) {
t.Errorf("unexpected error: %v", err)
}
if test.isOk && !ok {
t.Errorf("expected ok, got none. %v %s %s", test.pod, test.nodeInfo, test.test)
@@ -453,8 +462,11 @@ func TestAWSDiskConflicts(t *testing.T) {
for _, test := range tests {
ok, err := NoDiskConflict(test.pod, "machine", test.nodeInfo)
if err != nil {
t.Fatalf("unexpected error: %v", err)
if !reflect.DeepEqual(err, ErrDiskConflict) && err != nil {
t.Errorf("unexpected error: %v", err)
}
if ok == false && !reflect.DeepEqual(err, ErrDiskConflict) {
t.Errorf("unexpected error: %v", err)
}
if test.isOk && !ok {
t.Errorf("expected ok, got none. %v %s %s", test.pod, test.nodeInfo, test.test)
@@ -508,8 +520,11 @@ func TestRBDDiskConflicts(t *testing.T) {
for _, test := range tests {
ok, err := NoDiskConflict(test.pod, "machine", test.nodeInfo)
if err != nil {
t.Fatalf("unexpected error: %v", err)
if !reflect.DeepEqual(err, ErrDiskConflict) && err != nil {
t.Errorf("unexpected error: %v", err)
}
if ok == false && !reflect.DeepEqual(err, ErrDiskConflict) {
t.Errorf("unexpected error: %v", err)
}
if test.isOk && !ok {
t.Errorf("expected ok, got none. %v %s %s", test.pod, test.nodeInfo, test.test)
@@ -980,9 +995,12 @@ func TestPodFitsSelector(t *testing.T) {
for _, test := range tests {
node := api.Node{ObjectMeta: api.ObjectMeta{Labels: test.labels}}
fit := NodeSelector{FakeNodeInfo(node)}
fit := NodeStatus{FakeNodeInfo(node)}
fits, err := fit.PodSelectorMatches(test.pod, "machine", schedulercache.NewNodeInfo())
if err != nil {
if !reflect.DeepEqual(err, ErrNodeSelectorNotMatch) && err != nil {
t.Errorf("unexpected error: %v", err)
}
if fits == false && !reflect.DeepEqual(err, ErrNodeSelectorNotMatch) {
t.Errorf("unexpected error: %v", err)
}
if fits != test.fits {
@@ -1041,7 +1059,10 @@ func TestNodeLabelPresence(t *testing.T) {
node := api.Node{ObjectMeta: api.ObjectMeta{Labels: label}}
labelChecker := NodeLabelChecker{FakeNodeInfo(node), test.labels, test.presence}
fits, err := labelChecker.CheckNodeLabelPresence(test.pod, "machine", schedulercache.NewNodeInfo())
if err != nil {
if !reflect.DeepEqual(err, ErrNodeLabelPresenceViolated) && err != nil {
t.Errorf("unexpected error: %v", err)
}
if fits == false && !reflect.DeepEqual(err, ErrNodeLabelPresenceViolated) {
t.Errorf("unexpected error: %v", err)
}
if fits != test.fits {
@@ -1181,7 +1202,10 @@ func TestServiceAffinity(t *testing.T) {
nodes := []api.Node{node1, node2, node3, node4, node5}
serviceAffinity := ServiceAffinity{algorithm.FakePodLister(test.pods), algorithm.FakeServiceLister(test.services), FakeNodeListInfo(nodes), test.labels}
fits, err := serviceAffinity.CheckServiceAffinity(test.pod, test.node, schedulercache.NewNodeInfo())
if err != nil {
if !reflect.DeepEqual(err, ErrServiceAffinityViolated) && err != nil {
t.Errorf("unexpected error: %v", err)
}
if fits == false && !reflect.DeepEqual(err, ErrServiceAffinityViolated) {
t.Errorf("unexpected error: %v", err)
}
if fits != test.fits {
@@ -1401,7 +1425,7 @@ func TestEBSVolumeCountConflicts(t *testing.T) {
for _, test := range tests {
pred := NewMaxPDVolumeCountPredicate(filter, test.maxVols, pvInfo, pvcInfo)
fits, err := pred(test.newPod, "some-node", schedulercache.NewNodeInfo(test.existingPods...))
if err != nil {
if err != nil && !reflect.DeepEqual(err, ErrMaxVolumeCountExceeded) {
t.Errorf("unexpected error: %v", err)
}
@@ -1455,3 +1479,83 @@ func TestPredicatesRegistered(t *testing.T) {
}
}
}
func newPodWithPort(hostPorts ...int) *api.Pod {
networkPorts := []api.ContainerPort{}
for _, port := range hostPorts {
networkPorts = append(networkPorts, api.ContainerPort{HostPort: port})
}
return &api.Pod{
Spec: api.PodSpec{
Containers: []api.Container{
{
Ports: networkPorts,
},
},
},
}
}
func TestRunGeneralPredicates(t *testing.T) {
resourceTests := []struct {
pod *api.Pod
nodeName string
nodeInfo *schedulercache.NodeInfo
node *api.Node
fits bool
test string
wErr error
}{
{
pod: &api.Pod{},
nodeName: "machine1",
nodeInfo: schedulercache.NewNodeInfo(
newResourcePod(resourceRequest{milliCPU: 9, memory: 19})),
node: &api.Node{Status: api.NodeStatus{Capacity: makeResources(10, 20, 32).Capacity, Allocatable: makeAllocatableResources(10, 20, 32)}},
fits: true,
wErr: nil,
test: "no resources/port/host requested always fits",
},
{
pod: newResourcePod(resourceRequest{milliCPU: 8, memory: 10}),
nodeName: "machine1",
nodeInfo: schedulercache.NewNodeInfo(
newResourcePod(resourceRequest{milliCPU: 5, memory: 19})),
node: &api.Node{Status: api.NodeStatus{Capacity: makeResources(10, 20, 32).Capacity, Allocatable: makeAllocatableResources(10, 20, 32)}},
fits: false,
wErr: newInsufficientResourceError("CPU", 8, 5, 10),
test: "not enough cpu resource",
},
{
pod: &api.Pod{
Spec: api.PodSpec{
NodeName: "machine2",
},
},
nodeName: "machine1",
nodeInfo: schedulercache.NewNodeInfo(),
node: &api.Node{Status: api.NodeStatus{Capacity: makeResources(10, 20, 32).Capacity, Allocatable: makeAllocatableResources(10, 20, 32)}},
fits: false,
wErr: ErrPodNotMatchHostName,
test: "host not match",
},
{
pod: newPodWithPort(123),
nodeName: "machine1",
nodeInfo: schedulercache.NewNodeInfo(newPodWithPort(123)),
node: &api.Node{Status: api.NodeStatus{Capacity: makeResources(10, 20, 32).Capacity, Allocatable: makeAllocatableResources(10, 20, 32)}},
fits: false,
wErr: ErrPodNotFitsHostPorts,
test: "hostport conflict",
},
}
for _, test := range resourceTests {
fits, err := RunGeneralPredicates(test.pod, test.nodeName, test.nodeInfo, test.node)
if !reflect.DeepEqual(err, test.wErr) {
t.Errorf("%s: unexpected error: %v, want: %v", test.test, err, test.wErr)
}
if fits != test.fits {
t.Errorf("%s: expected: %v got %v", test.test, test.fits, fits)
}
}
}

View File

@@ -23,6 +23,7 @@ import (
)
// FitPredicate is a function that indicates if a pod fits into an existing node.
// The failure information is given by the error.
type FitPredicate func(pod *api.Pod, nodeName string, nodeInfo *schedulercache.NodeInfo) (bool, error)
type PriorityFunction func(pod *api.Pod, nodeNameToInfo map[string]*schedulercache.NodeInfo, nodeLister NodeLister) (schedulerapi.HostPriorityList, error)