mirror of
https://github.com/optim-enterprises-bv/kubernetes.git
synced 2025-10-31 18:28:13 +00:00
PredicateMetadata factory and optimization, Cleaned up some comments,
Comments addressed, Make emptyMetadataProducer a func to avoid casting, FakeSvcLister: remove error return for len(svc)=0. New test for predicatePrecomp to make method semantics explictly enforced when meta is missing. Precompute wrapper.
This commit is contained in:
@@ -36,6 +36,19 @@ import (
|
||||
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
|
||||
)
|
||||
|
||||
// predicatePrecomputations: Helper types/variables...
|
||||
type PredicateMetadataModifier func(pm *predicateMetadata)
|
||||
|
||||
var predicatePrecomputeRegisterLock sync.Mutex
|
||||
var predicatePrecomputations map[string]PredicateMetadataModifier = make(map[string]PredicateMetadataModifier)
|
||||
|
||||
func RegisterPredicatePrecomputation(predicateName string, precomp PredicateMetadataModifier) {
|
||||
predicatePrecomputeRegisterLock.Lock()
|
||||
defer predicatePrecomputeRegisterLock.Unlock()
|
||||
predicatePrecomputations[predicateName] = precomp
|
||||
}
|
||||
|
||||
// Other types for predicate functions...
|
||||
type NodeInfo interface {
|
||||
GetNodeInfo(nodeID string) (*api.Node, error)
|
||||
}
|
||||
@@ -67,34 +80,21 @@ func (c *CachedNodeInfo) GetNodeInfo(id string) (*api.Node, error) {
|
||||
return node.(*api.Node), nil
|
||||
}
|
||||
|
||||
// predicateMetadata is a type that is passed as metadata for predicate functions
|
||||
type predicateMetadata struct {
|
||||
podBestEffort bool
|
||||
podRequest *schedulercache.Resource
|
||||
podPorts map[int]bool
|
||||
matchingAntiAffinityTerms []matchingPodAntiAffinityTerm
|
||||
}
|
||||
|
||||
// Note that predicateMetdata and matchingPodAntiAffinityTerm need to be declared in the same file
|
||||
// due to the way declarations are processed in predicate declaration unit tests.
|
||||
type matchingPodAntiAffinityTerm struct {
|
||||
term *api.PodAffinityTerm
|
||||
node *api.Node
|
||||
}
|
||||
|
||||
func PredicateMetadata(pod *api.Pod, nodeInfoMap map[string]*schedulercache.NodeInfo) interface{} {
|
||||
// If we cannot compute metadata, just return nil
|
||||
if pod == nil {
|
||||
return nil
|
||||
}
|
||||
matchingTerms, err := getMatchingAntiAffinityTerms(pod, nodeInfoMap)
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
return &predicateMetadata{
|
||||
podBestEffort: isPodBestEffort(pod),
|
||||
podRequest: GetResourceRequest(pod),
|
||||
podPorts: GetUsedPorts(pod),
|
||||
matchingAntiAffinityTerms: matchingTerms,
|
||||
}
|
||||
type predicateMetadata struct {
|
||||
pod *api.Pod
|
||||
podBestEffort bool
|
||||
podRequest *schedulercache.Resource
|
||||
podPorts map[int]bool
|
||||
matchingAntiAffinityTerms []matchingPodAntiAffinityTerm
|
||||
serviceAffinityMatchingPodList []*api.Pod
|
||||
serviceAffinityMatchingPodServices []*api.Service
|
||||
}
|
||||
|
||||
func isVolumeConflict(volume api.Volume, pod *api.Pod) bool {
|
||||
@@ -627,20 +627,42 @@ type ServiceAffinity struct {
|
||||
labels []string
|
||||
}
|
||||
|
||||
func NewServiceAffinityPredicate(podLister algorithm.PodLister, serviceLister algorithm.ServiceLister, nodeInfo NodeInfo, labels []string) algorithm.FitPredicate {
|
||||
// serviceAffinityPrecomputation should be run once by the scheduler before looping through the Predicate. It is a helper function that
|
||||
// only should be referenced by NewServiceAffinityPredicate.
|
||||
func (s *ServiceAffinity) serviceAffinityPrecomputation(pm *predicateMetadata) {
|
||||
if pm.pod == nil {
|
||||
glog.Errorf("Cannot precompute service affinity, a pod is required to caluculate service affinity.")
|
||||
return
|
||||
}
|
||||
|
||||
var errSvc, errList error
|
||||
// Store services which match the pod.
|
||||
pm.serviceAffinityMatchingPodServices, errSvc = s.serviceLister.GetPodServices(pm.pod)
|
||||
selector := CreateSelectorFromLabels(pm.pod.Labels)
|
||||
// consider only the pods that belong to the same namespace
|
||||
allMatches, errList := s.podLister.List(selector)
|
||||
|
||||
// In the future maybe we will return them as part of the function.
|
||||
if errSvc != nil || errList != nil {
|
||||
glog.Errorf("Some Error were found while precomputing svc affinity: \nservices:%v , \npods:%v", errSvc, errList)
|
||||
}
|
||||
pm.serviceAffinityMatchingPodList = FilterPodsByNamespace(allMatches, pm.pod.Namespace)
|
||||
}
|
||||
|
||||
func NewServiceAffinityPredicate(podLister algorithm.PodLister, serviceLister algorithm.ServiceLister, nodeInfo NodeInfo, labels []string) (algorithm.FitPredicate, PredicateMetadataModifier) {
|
||||
affinity := &ServiceAffinity{
|
||||
podLister: podLister,
|
||||
serviceLister: serviceLister,
|
||||
nodeInfo: nodeInfo,
|
||||
labels: labels,
|
||||
}
|
||||
return affinity.CheckServiceAffinity
|
||||
return affinity.checkServiceAffinity, affinity.serviceAffinityPrecomputation
|
||||
}
|
||||
|
||||
// The checkServiceAffinity predicate matches nodes in such a way to force that
|
||||
// ServiceAffinity.labels are homogenous for pods added to a node.
|
||||
// (i.e. it returns true IFF this pod can be added to this node, such
|
||||
// that all other pods in the same service are running on nodes w/
|
||||
// checkServiceAffinity is a predicate which matches nodes in such a way to force that
|
||||
// ServiceAffinity.labels are homogenous for pods that are scheduled to a node.
|
||||
// (i.e. it returns true IFF this pod can be added to this node such that all other pods in
|
||||
// the same service are running on nodes with
|
||||
// the exact same ServiceAffinity.label values).
|
||||
//
|
||||
// Details:
|
||||
@@ -650,46 +672,47 @@ func NewServiceAffinityPredicate(podLister algorithm.PodLister, serviceLister al
|
||||
// the match.
|
||||
// Otherwise:
|
||||
// Create an "implicit selector" which gaurantees pods will land on nodes with similar values
|
||||
// for the affinity labels.
|
||||
// for the affinity labels.
|
||||
//
|
||||
// To do this, we "reverse engineer" a selector by introspecting existing pods running under the same service+namespace.
|
||||
// These backfilled labels in the selector "L" are defined like so:
|
||||
// - L is a label that the ServiceAffinity object needs as a matching constraints.
|
||||
// - L is not defined in the pod itself already.
|
||||
// - and SOME pod, from a service, in the same namespace, ALREADY scheduled onto a node, has a matching value.
|
||||
func (s *ServiceAffinity) CheckServiceAffinity(pod *api.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (bool, []algorithm.PredicateFailureReason, error) {
|
||||
//
|
||||
// WARNING: This Predicate is NOT gauranteed to work if some of the predicateMetadata data isn't precomputed...
|
||||
// For that reason it is not exported, i.e. it is highlhy coupled to the implementation of the FitPredicate construction.
|
||||
func (s *ServiceAffinity) checkServiceAffinity(pod *api.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (bool, []algorithm.PredicateFailureReason, error) {
|
||||
var services []*api.Service
|
||||
var pods []*api.Pod
|
||||
if pm, ok := meta.(*predicateMetadata); ok && (pm.serviceAffinityMatchingPodList != nil || pm.serviceAffinityMatchingPodServices != nil) {
|
||||
services = pm.serviceAffinityMatchingPodServices
|
||||
pods = pm.serviceAffinityMatchingPodList
|
||||
} else {
|
||||
// Make the predicate resilient in case metadata is missing.
|
||||
pm = &predicateMetadata{pod: pod}
|
||||
s.serviceAffinityPrecomputation(pm)
|
||||
pods, services = pm.serviceAffinityMatchingPodList, pm.serviceAffinityMatchingPodServices
|
||||
}
|
||||
node := nodeInfo.Node()
|
||||
if node == nil {
|
||||
return false, nil, fmt.Errorf("node not found")
|
||||
}
|
||||
|
||||
// check if the pod being scheduled has the affinity labels specified in its NodeSelector
|
||||
affinityLabels := FindLabelsInSet(s.labels, labels.Set(pod.Spec.NodeSelector))
|
||||
|
||||
// Introspect services IFF we didn't predefine all the affinity labels in the pod itself.
|
||||
// Step 1: If we don't have all constraints, introspect nodes to find the missing constraints.
|
||||
if len(s.labels) > len(affinityLabels) {
|
||||
services, err := s.serviceLister.GetPodServices(pod)
|
||||
if err == nil && len(services) > 0 {
|
||||
// just use the first service and get the other pods within the service
|
||||
// TODO: a separate predicate can be created that tries to handle all services for the pod
|
||||
selector := labels.SelectorFromSet(services[0].Spec.Selector)
|
||||
servicePods, err := s.podLister.List(selector)
|
||||
if err != nil {
|
||||
return false, nil, err
|
||||
}
|
||||
// consider only the pods that belong to the same namespace
|
||||
nsServicePods := FilterPodsByNamespace(servicePods, pod.Namespace)
|
||||
if len(nsServicePods) > 0 {
|
||||
// consider any service pod and fetch the node its hosted on
|
||||
otherNode, err := s.nodeInfo.GetNodeInfo(nsServicePods[0].Spec.NodeName)
|
||||
if len(services) > 0 {
|
||||
if len(pods) > 0 {
|
||||
nodeWithAffinityLabels, err := s.nodeInfo.GetNodeInfo(pods[0].Spec.NodeName)
|
||||
if err != nil {
|
||||
return false, nil, err
|
||||
}
|
||||
AddUnsetLabelsToMap(affinityLabels, s.labels, labels.Set(otherNode.Labels))
|
||||
AddUnsetLabelsToMap(affinityLabels, s.labels, labels.Set(nodeWithAffinityLabels.Labels))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// check if the node matches the selector
|
||||
// Step 2: Finally complete the affinity predicate based on whatever set of predicates we were able to find.
|
||||
if CreateSelectorFromLabels(affinityLabels).Matches(labels.Set(node.Labels)) {
|
||||
return true, nil, nil
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user