mirror of
				https://github.com/optim-enterprises-bv/kubernetes.git
				synced 2025-10-31 18:28:13 +00:00 
			
		
		
		
	Move pkg/scheduler to plugin/pkg/scheduler
As the TODO in plugin/pkg/scheduler/scheduler.go described: move everything from pkg/scheduler into this package. Remove references from registry.
This commit is contained in:
		
							
								
								
									
										377
									
								
								plugin/pkg/scheduler/algorithm/predicates/predicates.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										377
									
								
								plugin/pkg/scheduler/algorithm/predicates/predicates.go
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,377 @@ | ||||
| /* | ||||
| Copyright 2014 The Kubernetes Authors All rights reserved. | ||||
|  | ||||
| Licensed under the Apache License, Version 2.0 (the "License"); | ||||
| you may not use this file except in compliance with the License. | ||||
| You may obtain a copy of the License at | ||||
|  | ||||
|     http://www.apache.org/licenses/LICENSE-2.0 | ||||
|  | ||||
| Unless required by applicable law or agreed to in writing, software | ||||
| distributed under the License is distributed on an "AS IS" BASIS, | ||||
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
| See the License for the specific language governing permissions and | ||||
| limitations under the License. | ||||
| */ | ||||
|  | ||||
| package predicates | ||||
|  | ||||
| import ( | ||||
| 	"fmt" | ||||
|  | ||||
| 	"github.com/GoogleCloudPlatform/kubernetes/pkg/api" | ||||
| 	"github.com/GoogleCloudPlatform/kubernetes/pkg/client" | ||||
| 	"github.com/GoogleCloudPlatform/kubernetes/pkg/labels" | ||||
| 	"github.com/GoogleCloudPlatform/kubernetes/plugin/pkg/scheduler/algorithm" | ||||
| ) | ||||
|  | ||||
| type NodeInfo interface { | ||||
| 	GetNodeInfo(nodeID string) (*api.Node, error) | ||||
| } | ||||
|  | ||||
| type StaticNodeInfo struct { | ||||
| 	*api.NodeList | ||||
| } | ||||
|  | ||||
| func (nodes StaticNodeInfo) GetNodeInfo(nodeID string) (*api.Node, error) { | ||||
| 	for ix := range nodes.Items { | ||||
| 		if nodes.Items[ix].Name == nodeID { | ||||
| 			return &nodes.Items[ix], nil | ||||
| 		} | ||||
| 	} | ||||
| 	return nil, fmt.Errorf("failed to find node: %s, %#v", nodeID, nodes) | ||||
| } | ||||
|  | ||||
| type ClientNodeInfo struct { | ||||
| 	*client.Client | ||||
| } | ||||
|  | ||||
| func (nodes ClientNodeInfo) GetNodeInfo(nodeID string) (*api.Node, error) { | ||||
| 	return nodes.Nodes().Get(nodeID) | ||||
| } | ||||
|  | ||||
| func isVolumeConflict(volume api.Volume, pod *api.Pod) bool { | ||||
| 	if volume.GCEPersistentDisk != nil { | ||||
| 		pdName := volume.GCEPersistentDisk.PDName | ||||
|  | ||||
| 		manifest := &(pod.Spec) | ||||
| 		for ix := range manifest.Volumes { | ||||
| 			if manifest.Volumes[ix].GCEPersistentDisk != nil && | ||||
| 				manifest.Volumes[ix].GCEPersistentDisk.PDName == pdName { | ||||
| 				return true | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
| 	if volume.AWSElasticBlockStore != nil { | ||||
| 		volumeID := volume.AWSElasticBlockStore.VolumeID | ||||
|  | ||||
| 		manifest := &(pod.Spec) | ||||
| 		for ix := range manifest.Volumes { | ||||
| 			if manifest.Volumes[ix].AWSElasticBlockStore != nil && | ||||
| 				manifest.Volumes[ix].AWSElasticBlockStore.VolumeID == volumeID { | ||||
| 				return true | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
| 	return false | ||||
| } | ||||
|  | ||||
| // NoDiskConflict evaluates if a pod can fit due to the volumes it requests, and those that | ||||
| // are already mounted. Some times of volumes are mounted onto node machines.  For now, these mounts | ||||
| // are exclusive so if there is already a volume mounted on that node, another pod can't schedule | ||||
| // there. This is GCE specific for now. | ||||
| // TODO: migrate this into some per-volume specific code? | ||||
| func NoDiskConflict(pod *api.Pod, existingPods []*api.Pod, node string) (bool, error) { | ||||
| 	manifest := &(pod.Spec) | ||||
| 	for ix := range manifest.Volumes { | ||||
| 		for podIx := range existingPods { | ||||
| 			if isVolumeConflict(manifest.Volumes[ix], existingPods[podIx]) { | ||||
| 				return false, nil | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
| 	return true, nil | ||||
| } | ||||
|  | ||||
| type ResourceFit struct { | ||||
| 	info NodeInfo | ||||
| } | ||||
|  | ||||
| type resourceRequest struct { | ||||
| 	milliCPU int64 | ||||
| 	memory   int64 | ||||
| } | ||||
|  | ||||
| func getResourceRequest(pod *api.Pod) resourceRequest { | ||||
| 	result := resourceRequest{} | ||||
| 	for ix := range pod.Spec.Containers { | ||||
| 		limits := pod.Spec.Containers[ix].Resources.Limits | ||||
| 		result.memory += limits.Memory().Value() | ||||
| 		result.milliCPU += limits.Cpu().MilliValue() | ||||
| 	} | ||||
| 	return result | ||||
| } | ||||
|  | ||||
| func CheckPodsExceedingCapacity(pods []*api.Pod, capacity api.ResourceList) (fitting []*api.Pod, notFitting []*api.Pod) { | ||||
| 	totalMilliCPU := capacity.Cpu().MilliValue() | ||||
| 	totalMemory := capacity.Memory().Value() | ||||
| 	milliCPURequested := int64(0) | ||||
| 	memoryRequested := int64(0) | ||||
| 	for _, pod := range pods { | ||||
| 		podRequest := getResourceRequest(pod) | ||||
| 		fitsCPU := totalMilliCPU == 0 || (totalMilliCPU-milliCPURequested) >= podRequest.milliCPU | ||||
| 		fitsMemory := totalMemory == 0 || (totalMemory-memoryRequested) >= podRequest.memory | ||||
| 		if !fitsCPU || !fitsMemory { | ||||
| 			// the pod doesn't fit | ||||
| 			notFitting = append(notFitting, pod) | ||||
| 			continue | ||||
| 		} | ||||
| 		// the pod fits | ||||
| 		milliCPURequested += podRequest.milliCPU | ||||
| 		memoryRequested += podRequest.memory | ||||
| 		fitting = append(fitting, pod) | ||||
| 	} | ||||
| 	return | ||||
| } | ||||
|  | ||||
| // PodFitsResources calculates fit based on requested, rather than used resources | ||||
| func (r *ResourceFit) PodFitsResources(pod *api.Pod, existingPods []*api.Pod, node string) (bool, error) { | ||||
| 	podRequest := getResourceRequest(pod) | ||||
| 	if podRequest.milliCPU == 0 && podRequest.memory == 0 { | ||||
| 		// no resources requested always fits. | ||||
| 		return true, nil | ||||
| 	} | ||||
| 	info, err := r.info.GetNodeInfo(node) | ||||
| 	if err != nil { | ||||
| 		return false, err | ||||
| 	} | ||||
| 	pods := []*api.Pod{} | ||||
| 	copy(pods, existingPods) | ||||
| 	pods = append(existingPods, pod) | ||||
| 	_, exceeding := CheckPodsExceedingCapacity(pods, info.Status.Capacity) | ||||
| 	if len(exceeding) > 0 { | ||||
| 		return false, nil | ||||
| 	} | ||||
| 	return true, nil | ||||
| } | ||||
|  | ||||
| func NewResourceFitPredicate(info NodeInfo) algorithm.FitPredicate { | ||||
| 	fit := &ResourceFit{ | ||||
| 		info: info, | ||||
| 	} | ||||
| 	return fit.PodFitsResources | ||||
| } | ||||
|  | ||||
| func NewSelectorMatchPredicate(info NodeInfo) algorithm.FitPredicate { | ||||
| 	selector := &NodeSelector{ | ||||
| 		info: info, | ||||
| 	} | ||||
| 	return selector.PodSelectorMatches | ||||
| } | ||||
|  | ||||
| func PodMatchesNodeLabels(pod *api.Pod, node *api.Node) bool { | ||||
| 	if len(pod.Spec.NodeSelector) == 0 { | ||||
| 		return true | ||||
| 	} | ||||
| 	selector := labels.SelectorFromSet(pod.Spec.NodeSelector) | ||||
| 	return selector.Matches(labels.Set(node.Labels)) | ||||
| } | ||||
|  | ||||
| type NodeSelector struct { | ||||
| 	info NodeInfo | ||||
| } | ||||
|  | ||||
| func (n *NodeSelector) PodSelectorMatches(pod *api.Pod, existingPods []*api.Pod, node string) (bool, error) { | ||||
| 	minion, err := n.info.GetNodeInfo(node) | ||||
| 	if err != nil { | ||||
| 		return false, err | ||||
| 	} | ||||
| 	return PodMatchesNodeLabels(pod, minion), nil | ||||
| } | ||||
|  | ||||
| func PodFitsHost(pod *api.Pod, existingPods []*api.Pod, node string) (bool, error) { | ||||
| 	if len(pod.Spec.Host) == 0 { | ||||
| 		return true, nil | ||||
| 	} | ||||
| 	return pod.Spec.Host == node, nil | ||||
| } | ||||
|  | ||||
| type NodeLabelChecker struct { | ||||
| 	info     NodeInfo | ||||
| 	labels   []string | ||||
| 	presence bool | ||||
| } | ||||
|  | ||||
| func NewNodeLabelPredicate(info NodeInfo, labels []string, presence bool) algorithm.FitPredicate { | ||||
| 	labelChecker := &NodeLabelChecker{ | ||||
| 		info:     info, | ||||
| 		labels:   labels, | ||||
| 		presence: presence, | ||||
| 	} | ||||
| 	return labelChecker.CheckNodeLabelPresence | ||||
| } | ||||
|  | ||||
| // CheckNodeLabelPresence checks whether all of the specified labels exists on a minion or not, regardless of their value | ||||
| // If "presence" is false, then returns false if any of the requested labels matches any of the minion's labels, | ||||
| // otherwise returns true. | ||||
| // If "presence" is true, then returns false if any of the requested labels does not match any of the minion's labels, | ||||
| // otherwise returns true. | ||||
| // | ||||
| // Consider the cases where the minions are placed in regions/zones/racks and these are identified by labels | ||||
| // In some cases, it is required that only minions that are part of ANY of the defined regions/zones/racks be selected | ||||
| // | ||||
| // Alternately, eliminating minions that have a certain label, regardless of value, is also useful | ||||
| // A minion may have a label with "retiring" as key and the date as the value | ||||
| // and it may be desirable to avoid scheduling new pods on this minion | ||||
| func (n *NodeLabelChecker) CheckNodeLabelPresence(pod *api.Pod, existingPods []*api.Pod, node string) (bool, error) { | ||||
| 	var exists bool | ||||
| 	minion, err := n.info.GetNodeInfo(node) | ||||
| 	if err != nil { | ||||
| 		return false, err | ||||
| 	} | ||||
| 	minionLabels := labels.Set(minion.Labels) | ||||
| 	for _, label := range n.labels { | ||||
| 		exists = minionLabels.Has(label) | ||||
| 		if (exists && !n.presence) || (!exists && n.presence) { | ||||
| 			return false, nil | ||||
| 		} | ||||
| 	} | ||||
| 	return true, nil | ||||
| } | ||||
|  | ||||
| type ServiceAffinity struct { | ||||
| 	podLister     algorithm.PodLister | ||||
| 	serviceLister algorithm.ServiceLister | ||||
| 	nodeInfo      NodeInfo | ||||
| 	labels        []string | ||||
| } | ||||
|  | ||||
| func NewServiceAffinityPredicate(podLister algorithm.PodLister, serviceLister algorithm.ServiceLister, nodeInfo NodeInfo, labels []string) algorithm.FitPredicate { | ||||
| 	affinity := &ServiceAffinity{ | ||||
| 		podLister:     podLister, | ||||
| 		serviceLister: serviceLister, | ||||
| 		nodeInfo:      nodeInfo, | ||||
| 		labels:        labels, | ||||
| 	} | ||||
| 	return affinity.CheckServiceAffinity | ||||
| } | ||||
|  | ||||
| // CheckServiceAffinity ensures that only the minions that match the specified labels are considered for scheduling. | ||||
| // The set of labels to be considered are provided to the struct (ServiceAffinity). | ||||
| // The pod is checked for the labels and any missing labels are then checked in the minion | ||||
| // that hosts the service pods (peers) for the given pod. | ||||
| // | ||||
| // We add an implicit selector requiring some particular value V for label L to a pod, if: | ||||
| // - L is listed in the ServiceAffinity object that is passed into the function | ||||
| // - the pod does not have any NodeSelector for L | ||||
| // - some other pod from the same service is already scheduled onto a minion that has value V for label L | ||||
| func (s *ServiceAffinity) CheckServiceAffinity(pod *api.Pod, existingPods []*api.Pod, node string) (bool, error) { | ||||
| 	var affinitySelector labels.Selector | ||||
|  | ||||
| 	// check if the pod being scheduled has the affinity labels specified in its NodeSelector | ||||
| 	affinityLabels := map[string]string{} | ||||
| 	nodeSelector := labels.Set(pod.Spec.NodeSelector) | ||||
| 	labelsExist := true | ||||
| 	for _, l := range s.labels { | ||||
| 		if nodeSelector.Has(l) { | ||||
| 			affinityLabels[l] = nodeSelector.Get(l) | ||||
| 		} else { | ||||
| 			// the current pod does not specify all the labels, look in the existing service pods | ||||
| 			labelsExist = false | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	// skip looking at other pods in the service if the current pod defines all the required affinity labels | ||||
| 	if !labelsExist { | ||||
| 		services, err := s.serviceLister.GetPodServices(pod) | ||||
| 		if err == nil { | ||||
| 			// just use the first service and get the other pods within the service | ||||
| 			// TODO: a separate predicate can be created that tries to handle all services for the pod | ||||
| 			selector := labels.SelectorFromSet(services[0].Spec.Selector) | ||||
| 			servicePods, err := s.podLister.List(selector) | ||||
| 			if err != nil { | ||||
| 				return false, err | ||||
| 			} | ||||
| 			// consider only the pods that belong to the same namespace | ||||
| 			nsServicePods := []*api.Pod{} | ||||
| 			for _, nsPod := range servicePods { | ||||
| 				if nsPod.Namespace == pod.Namespace { | ||||
| 					nsServicePods = append(nsServicePods, nsPod) | ||||
| 				} | ||||
| 			} | ||||
| 			if len(nsServicePods) > 0 { | ||||
| 				// consider any service pod and fetch the minion its hosted on | ||||
| 				otherMinion, err := s.nodeInfo.GetNodeInfo(nsServicePods[0].Spec.Host) | ||||
| 				if err != nil { | ||||
| 					return false, err | ||||
| 				} | ||||
| 				for _, l := range s.labels { | ||||
| 					// If the pod being scheduled has the label value specified, do not override it | ||||
| 					if _, exists := affinityLabels[l]; exists { | ||||
| 						continue | ||||
| 					} | ||||
| 					if labels.Set(otherMinion.Labels).Has(l) { | ||||
| 						affinityLabels[l] = labels.Set(otherMinion.Labels).Get(l) | ||||
| 					} | ||||
| 				} | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	// if there are no existing pods in the service, consider all minions | ||||
| 	if len(affinityLabels) == 0 { | ||||
| 		affinitySelector = labels.Everything() | ||||
| 	} else { | ||||
| 		affinitySelector = labels.Set(affinityLabels).AsSelector() | ||||
| 	} | ||||
|  | ||||
| 	minion, err := s.nodeInfo.GetNodeInfo(node) | ||||
| 	if err != nil { | ||||
| 		return false, err | ||||
| 	} | ||||
|  | ||||
| 	// check if the minion matches the selector | ||||
| 	return affinitySelector.Matches(labels.Set(minion.Labels)), nil | ||||
| } | ||||
|  | ||||
| func PodFitsPorts(pod *api.Pod, existingPods []*api.Pod, node string) (bool, error) { | ||||
| 	existingPorts := getUsedPorts(existingPods...) | ||||
| 	wantPorts := getUsedPorts(pod) | ||||
| 	for wport := range wantPorts { | ||||
| 		if wport == 0 { | ||||
| 			continue | ||||
| 		} | ||||
| 		if existingPorts[wport] { | ||||
| 			return false, nil | ||||
| 		} | ||||
| 	} | ||||
| 	return true, nil | ||||
| } | ||||
|  | ||||
| func getUsedPorts(pods ...*api.Pod) map[int]bool { | ||||
| 	ports := make(map[int]bool) | ||||
| 	for _, pod := range pods { | ||||
| 		for _, container := range pod.Spec.Containers { | ||||
| 			for _, podPort := range container.Ports { | ||||
| 				ports[podPort.HostPort] = true | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
| 	return ports | ||||
| } | ||||
|  | ||||
| // MapPodsToMachines obtains a list of pods and pivots that list into a map where the keys are host names | ||||
| // and the values are the list of pods running on that host. | ||||
| func MapPodsToMachines(lister algorithm.PodLister) (map[string][]*api.Pod, error) { | ||||
| 	machineToPods := map[string][]*api.Pod{} | ||||
| 	// TODO: perform more targeted query... | ||||
| 	pods, err := lister.List(labels.Everything()) | ||||
| 	if err != nil { | ||||
| 		return map[string][]*api.Pod{}, err | ||||
| 	} | ||||
| 	for _, scheduledPod := range pods { | ||||
| 		host := scheduledPod.Spec.Host | ||||
| 		machineToPods[host] = append(machineToPods[host], scheduledPod) | ||||
| 	} | ||||
| 	return machineToPods, nil | ||||
| } | ||||
		Reference in New Issue
	
	Block a user
	 hurf
					hurf