mirror of
				https://github.com/optim-enterprises-bv/kubernetes.git
				synced 2025-11-03 19:58:17 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			744 lines
		
	
	
		
			27 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
			
		
		
	
	
			744 lines
		
	
	
		
			27 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
/*
 | 
						|
Copyright 2014 The Kubernetes Authors.
 | 
						|
 | 
						|
Licensed under the Apache License, Version 2.0 (the "License");
 | 
						|
you may not use this file except in compliance with the License.
 | 
						|
You may obtain a copy of the License at
 | 
						|
 | 
						|
    http://www.apache.org/licenses/LICENSE-2.0
 | 
						|
 | 
						|
Unless required by applicable law or agreed to in writing, software
 | 
						|
distributed under the License is distributed on an "AS IS" BASIS,
 | 
						|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
						|
See the License for the specific language governing permissions and
 | 
						|
limitations under the License.
 | 
						|
*/
 | 
						|
 | 
						|
package endpoint
 | 
						|
 | 
						|
import (
 | 
						|
	"context"
 | 
						|
	"fmt"
 | 
						|
	"math"
 | 
						|
	"strconv"
 | 
						|
	"time"
 | 
						|
 | 
						|
	v1 "k8s.io/api/core/v1"
 | 
						|
	apiequality "k8s.io/apimachinery/pkg/api/equality"
 | 
						|
	"k8s.io/apimachinery/pkg/api/errors"
 | 
						|
	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 | 
						|
	"k8s.io/apimachinery/pkg/labels"
 | 
						|
	utilruntime "k8s.io/apimachinery/pkg/util/runtime"
 | 
						|
	"k8s.io/apimachinery/pkg/util/wait"
 | 
						|
	coreinformers "k8s.io/client-go/informers/core/v1"
 | 
						|
	clientset "k8s.io/client-go/kubernetes"
 | 
						|
	"k8s.io/client-go/kubernetes/scheme"
 | 
						|
	v1core "k8s.io/client-go/kubernetes/typed/core/v1"
 | 
						|
	corelisters "k8s.io/client-go/listers/core/v1"
 | 
						|
	"k8s.io/client-go/tools/cache"
 | 
						|
	"k8s.io/client-go/tools/leaderelection/resourcelock"
 | 
						|
	"k8s.io/client-go/tools/record"
 | 
						|
	"k8s.io/client-go/util/workqueue"
 | 
						|
	"k8s.io/component-base/metrics/prometheus/ratelimiter"
 | 
						|
	"k8s.io/klog/v2"
 | 
						|
	"k8s.io/kubernetes/pkg/api/v1/endpoints"
 | 
						|
	podutil "k8s.io/kubernetes/pkg/api/v1/pod"
 | 
						|
	api "k8s.io/kubernetes/pkg/apis/core"
 | 
						|
	helper "k8s.io/kubernetes/pkg/apis/core/v1/helper"
 | 
						|
	"k8s.io/kubernetes/pkg/controller"
 | 
						|
	endpointutil "k8s.io/kubernetes/pkg/controller/util/endpoint"
 | 
						|
	utillabels "k8s.io/kubernetes/pkg/util/labels"
 | 
						|
	utilnet "k8s.io/utils/net"
 | 
						|
)
 | 
						|
 | 
						|
const (
 | 
						|
	// maxRetries is the number of times a service will be retried before it is dropped out of the queue.
 | 
						|
	// With the current rate-limiter in use (5ms*2^(maxRetries-1)) the following numbers represent the
 | 
						|
	// sequence of delays between successive queuings of a service.
 | 
						|
	//
 | 
						|
	// 5ms, 10ms, 20ms, 40ms, 80ms, 160ms, 320ms, 640ms, 1.3s, 2.6s, 5.1s, 10.2s, 20.4s, 41s, 82s
 | 
						|
	maxRetries = 15
 | 
						|
 | 
						|
	// maxCapacity represents the maximum number of addresses that should be
 | 
						|
	// stored in an Endpoints resource. In a future release, this controller
 | 
						|
	// may truncate endpoints exceeding this length.
 | 
						|
	maxCapacity = 1000
 | 
						|
 | 
						|
	// TolerateUnreadyEndpointsAnnotation is an annotation on the Service denoting if the endpoints
 | 
						|
	// controller should go ahead and create endpoints for unready pods. This annotation is
 | 
						|
	// currently only used by StatefulSets, where we need the pod to be DNS
 | 
						|
	// resolvable during initialization and termination. In this situation we
 | 
						|
	// create a headless Service just for the StatefulSet, and clients shouldn't
 | 
						|
	// be using this Service for anything so unready endpoints don't matter.
 | 
						|
	// Endpoints of these Services retain their DNS records and continue
 | 
						|
	// receiving traffic for the Service from the moment the kubelet starts all
 | 
						|
	// containers in the pod and marks it "Running", till the kubelet stops all
 | 
						|
	// containers and deletes the pod from the apiserver.
 | 
						|
	// This field is deprecated. v1.Service.PublishNotReadyAddresses will replace it
 | 
						|
	// subsequent releases.  It will be removed no sooner than 1.13.
 | 
						|
	TolerateUnreadyEndpointsAnnotation = "service.alpha.kubernetes.io/tolerate-unready-endpoints"
 | 
						|
 | 
						|
	// truncated is a possible value for `endpoints.kubernetes.io/over-capacity` annotation on an
 | 
						|
	// endpoint resource and indicates that the number of endpoints have been truncated to
 | 
						|
	// maxCapacity
 | 
						|
	truncated = "truncated"
 | 
						|
)
 | 
						|
 | 
						|
// NewEndpointController returns a new *Controller.
 | 
						|
func NewEndpointController(podInformer coreinformers.PodInformer, serviceInformer coreinformers.ServiceInformer,
 | 
						|
	endpointsInformer coreinformers.EndpointsInformer, client clientset.Interface, endpointUpdatesBatchPeriod time.Duration) *Controller {
 | 
						|
	broadcaster := record.NewBroadcaster()
 | 
						|
	broadcaster.StartStructuredLogging(0)
 | 
						|
	broadcaster.StartRecordingToSink(&v1core.EventSinkImpl{Interface: client.CoreV1().Events("")})
 | 
						|
	recorder := broadcaster.NewRecorder(scheme.Scheme, v1.EventSource{Component: "endpoint-controller"})
 | 
						|
 | 
						|
	if client != nil && client.CoreV1().RESTClient().GetRateLimiter() != nil {
 | 
						|
		ratelimiter.RegisterMetricAndTrackRateLimiterUsage("endpoint_controller", client.CoreV1().RESTClient().GetRateLimiter())
 | 
						|
	}
 | 
						|
	e := &Controller{
 | 
						|
		client:           client,
 | 
						|
		queue:            workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "endpoint"),
 | 
						|
		workerLoopPeriod: time.Second,
 | 
						|
	}
 | 
						|
 | 
						|
	serviceInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
 | 
						|
		AddFunc: e.onServiceUpdate,
 | 
						|
		UpdateFunc: func(old, cur interface{}) {
 | 
						|
			e.onServiceUpdate(cur)
 | 
						|
		},
 | 
						|
		DeleteFunc: e.onServiceDelete,
 | 
						|
	})
 | 
						|
	e.serviceLister = serviceInformer.Lister()
 | 
						|
	e.servicesSynced = serviceInformer.Informer().HasSynced
 | 
						|
 | 
						|
	podInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
 | 
						|
		AddFunc:    e.addPod,
 | 
						|
		UpdateFunc: e.updatePod,
 | 
						|
		DeleteFunc: e.deletePod,
 | 
						|
	})
 | 
						|
	e.podLister = podInformer.Lister()
 | 
						|
	e.podsSynced = podInformer.Informer().HasSynced
 | 
						|
 | 
						|
	endpointsInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
 | 
						|
		DeleteFunc: e.onEndpointsDelete,
 | 
						|
	})
 | 
						|
	e.endpointsLister = endpointsInformer.Lister()
 | 
						|
	e.endpointsSynced = endpointsInformer.Informer().HasSynced
 | 
						|
 | 
						|
	e.triggerTimeTracker = endpointutil.NewTriggerTimeTracker()
 | 
						|
	e.eventBroadcaster = broadcaster
 | 
						|
	e.eventRecorder = recorder
 | 
						|
 | 
						|
	e.endpointUpdatesBatchPeriod = endpointUpdatesBatchPeriod
 | 
						|
 | 
						|
	e.serviceSelectorCache = endpointutil.NewServiceSelectorCache()
 | 
						|
 | 
						|
	return e
 | 
						|
}
 | 
						|
 | 
						|
// Controller manages selector-based service endpoints.
 | 
						|
type Controller struct {
 | 
						|
	client           clientset.Interface
 | 
						|
	eventBroadcaster record.EventBroadcaster
 | 
						|
	eventRecorder    record.EventRecorder
 | 
						|
 | 
						|
	// serviceLister is able to list/get services and is populated by the shared informer passed to
 | 
						|
	// NewEndpointController.
 | 
						|
	serviceLister corelisters.ServiceLister
 | 
						|
	// servicesSynced returns true if the service shared informer has been synced at least once.
 | 
						|
	// Added as a member to the struct to allow injection for testing.
 | 
						|
	servicesSynced cache.InformerSynced
 | 
						|
 | 
						|
	// podLister is able to list/get pods and is populated by the shared informer passed to
 | 
						|
	// NewEndpointController.
 | 
						|
	podLister corelisters.PodLister
 | 
						|
	// podsSynced returns true if the pod shared informer has been synced at least once.
 | 
						|
	// Added as a member to the struct to allow injection for testing.
 | 
						|
	podsSynced cache.InformerSynced
 | 
						|
 | 
						|
	// endpointsLister is able to list/get endpoints and is populated by the shared informer passed to
 | 
						|
	// NewEndpointController.
 | 
						|
	endpointsLister corelisters.EndpointsLister
 | 
						|
	// endpointsSynced returns true if the endpoints shared informer has been synced at least once.
 | 
						|
	// Added as a member to the struct to allow injection for testing.
 | 
						|
	endpointsSynced cache.InformerSynced
 | 
						|
 | 
						|
	// Services that need to be updated. A channel is inappropriate here,
 | 
						|
	// because it allows services with lots of pods to be serviced much
 | 
						|
	// more often than services with few pods; it also would cause a
 | 
						|
	// service that's inserted multiple times to be processed more than
 | 
						|
	// necessary.
 | 
						|
	queue workqueue.RateLimitingInterface
 | 
						|
 | 
						|
	// workerLoopPeriod is the time between worker runs. The workers process the queue of service and pod changes.
 | 
						|
	workerLoopPeriod time.Duration
 | 
						|
 | 
						|
	// triggerTimeTracker is an util used to compute and export the EndpointsLastChangeTriggerTime
 | 
						|
	// annotation.
 | 
						|
	triggerTimeTracker *endpointutil.TriggerTimeTracker
 | 
						|
 | 
						|
	endpointUpdatesBatchPeriod time.Duration
 | 
						|
 | 
						|
	// serviceSelectorCache is a cache of service selectors to avoid high CPU consumption caused by frequent calls
 | 
						|
	// to AsSelectorPreValidated (see #73527)
 | 
						|
	serviceSelectorCache *endpointutil.ServiceSelectorCache
 | 
						|
}
 | 
						|
 | 
						|
// Run will not return until stopCh is closed. workers determines how many
 | 
						|
// endpoints will be handled in parallel.
 | 
						|
func (e *Controller) Run(ctx context.Context, workers int) {
 | 
						|
	defer utilruntime.HandleCrash()
 | 
						|
	defer e.queue.ShutDown()
 | 
						|
 | 
						|
	klog.Infof("Starting endpoint controller")
 | 
						|
	defer klog.Infof("Shutting down endpoint controller")
 | 
						|
 | 
						|
	if !cache.WaitForNamedCacheSync("endpoint", ctx.Done(), e.podsSynced, e.servicesSynced, e.endpointsSynced) {
 | 
						|
		return
 | 
						|
	}
 | 
						|
 | 
						|
	for i := 0; i < workers; i++ {
 | 
						|
		go wait.UntilWithContext(ctx, e.worker, e.workerLoopPeriod)
 | 
						|
	}
 | 
						|
 | 
						|
	go func() {
 | 
						|
		defer utilruntime.HandleCrash()
 | 
						|
		e.checkLeftoverEndpoints()
 | 
						|
	}()
 | 
						|
 | 
						|
	<-ctx.Done()
 | 
						|
}
 | 
						|
 | 
						|
// When a pod is added, figure out what services it will be a member of and
 | 
						|
// enqueue them. obj must have *v1.Pod type.
 | 
						|
func (e *Controller) addPod(obj interface{}) {
 | 
						|
	pod := obj.(*v1.Pod)
 | 
						|
	services, err := e.serviceSelectorCache.GetPodServiceMemberships(e.serviceLister, pod)
 | 
						|
	if err != nil {
 | 
						|
		utilruntime.HandleError(fmt.Errorf("Unable to get pod %s/%s's service memberships: %v", pod.Namespace, pod.Name, err))
 | 
						|
		return
 | 
						|
	}
 | 
						|
	for key := range services {
 | 
						|
		e.queue.AddAfter(key, e.endpointUpdatesBatchPeriod)
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
func podToEndpointAddressForService(svc *v1.Service, pod *v1.Pod) (*v1.EndpointAddress, error) {
 | 
						|
	var endpointIP string
 | 
						|
	ipFamily := v1.IPv4Protocol
 | 
						|
 | 
						|
	if len(svc.Spec.IPFamilies) > 0 {
 | 
						|
		// controller is connected to an api-server that correctly sets IPFamilies
 | 
						|
		ipFamily = svc.Spec.IPFamilies[0] // this works for headful and headless
 | 
						|
	} else {
 | 
						|
		// controller is connected to an api server that does not correctly
 | 
						|
		// set IPFamilies (e.g. old api-server during an upgrade)
 | 
						|
		// TODO (khenidak): remove by when the possibility of upgrading
 | 
						|
		// from a cluster that does not support dual stack is nil
 | 
						|
		if len(svc.Spec.ClusterIP) > 0 && svc.Spec.ClusterIP != v1.ClusterIPNone {
 | 
						|
			// headful service. detect via service clusterIP
 | 
						|
			if utilnet.IsIPv6String(svc.Spec.ClusterIP) {
 | 
						|
				ipFamily = v1.IPv6Protocol
 | 
						|
			}
 | 
						|
		} else {
 | 
						|
			// Since this is a headless service we use podIP to identify the family.
 | 
						|
			// This assumes that status.PodIP is assigned correctly (follows pod cidr and
 | 
						|
			// pod cidr list order is same as service cidr list order). The expectation is
 | 
						|
			// this is *most probably* the case.
 | 
						|
 | 
						|
			// if the family was incorrectly identified then this will be corrected once the
 | 
						|
			// the upgrade is completed (controller connects to api-server that correctly defaults services)
 | 
						|
			if utilnet.IsIPv6String(pod.Status.PodIP) {
 | 
						|
				ipFamily = v1.IPv6Protocol
 | 
						|
			}
 | 
						|
		}
 | 
						|
	}
 | 
						|
 | 
						|
	// find an ip that matches the family
 | 
						|
	for _, podIP := range pod.Status.PodIPs {
 | 
						|
		if (ipFamily == v1.IPv6Protocol) == utilnet.IsIPv6String(podIP.IP) {
 | 
						|
			endpointIP = podIP.IP
 | 
						|
			break
 | 
						|
		}
 | 
						|
	}
 | 
						|
 | 
						|
	if endpointIP == "" {
 | 
						|
		return nil, fmt.Errorf("failed to find a matching endpoint for service %v", svc.Name)
 | 
						|
	}
 | 
						|
 | 
						|
	return &v1.EndpointAddress{
 | 
						|
		IP:       endpointIP,
 | 
						|
		NodeName: &pod.Spec.NodeName,
 | 
						|
		TargetRef: &v1.ObjectReference{
 | 
						|
			Kind:            "Pod",
 | 
						|
			Namespace:       pod.ObjectMeta.Namespace,
 | 
						|
			Name:            pod.ObjectMeta.Name,
 | 
						|
			UID:             pod.ObjectMeta.UID,
 | 
						|
			ResourceVersion: pod.ObjectMeta.ResourceVersion,
 | 
						|
		},
 | 
						|
	}, nil
 | 
						|
}
 | 
						|
 | 
						|
// When a pod is updated, figure out what services it used to be a member of
 | 
						|
// and what services it will be a member of, and enqueue the union of these.
 | 
						|
// old and cur must be *v1.Pod types.
 | 
						|
func (e *Controller) updatePod(old, cur interface{}) {
 | 
						|
	services := endpointutil.GetServicesToUpdateOnPodChange(e.serviceLister, e.serviceSelectorCache, old, cur)
 | 
						|
	for key := range services {
 | 
						|
		e.queue.AddAfter(key, e.endpointUpdatesBatchPeriod)
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
// When a pod is deleted, enqueue the services the pod used to be a member of.
 | 
						|
// obj could be an *v1.Pod, or a DeletionFinalStateUnknown marker item.
 | 
						|
func (e *Controller) deletePod(obj interface{}) {
 | 
						|
	pod := endpointutil.GetPodFromDeleteAction(obj)
 | 
						|
	if pod != nil {
 | 
						|
		e.addPod(pod)
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
// onServiceUpdate updates the Service Selector in the cache and queues the Service for processing.
 | 
						|
func (e *Controller) onServiceUpdate(obj interface{}) {
 | 
						|
	key, err := controller.KeyFunc(obj)
 | 
						|
	if err != nil {
 | 
						|
		utilruntime.HandleError(fmt.Errorf("Couldn't get key for object %+v: %v", obj, err))
 | 
						|
		return
 | 
						|
	}
 | 
						|
 | 
						|
	_ = e.serviceSelectorCache.Update(key, obj.(*v1.Service).Spec.Selector)
 | 
						|
	e.queue.Add(key)
 | 
						|
}
 | 
						|
 | 
						|
// onServiceDelete removes the Service Selector from the cache and queues the Service for processing.
 | 
						|
func (e *Controller) onServiceDelete(obj interface{}) {
 | 
						|
	key, err := controller.KeyFunc(obj)
 | 
						|
	if err != nil {
 | 
						|
		utilruntime.HandleError(fmt.Errorf("Couldn't get key for object %+v: %v", obj, err))
 | 
						|
		return
 | 
						|
	}
 | 
						|
 | 
						|
	e.serviceSelectorCache.Delete(key)
 | 
						|
	e.queue.Add(key)
 | 
						|
}
 | 
						|
 | 
						|
func (e *Controller) onEndpointsDelete(obj interface{}) {
 | 
						|
	key, err := controller.KeyFunc(obj)
 | 
						|
	if err != nil {
 | 
						|
		utilruntime.HandleError(fmt.Errorf("Couldn't get key for object %+v: %v", obj, err))
 | 
						|
		return
 | 
						|
	}
 | 
						|
	e.queue.Add(key)
 | 
						|
}
 | 
						|
 | 
						|
// worker runs a worker thread that just dequeues items, processes them, and
 | 
						|
// marks them done. You may run as many of these in parallel as you wish; the
 | 
						|
// workqueue guarantees that they will not end up processing the same service
 | 
						|
// at the same time.
 | 
						|
func (e *Controller) worker(ctx context.Context) {
 | 
						|
	for e.processNextWorkItem(ctx) {
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
func (e *Controller) processNextWorkItem(ctx context.Context) bool {
 | 
						|
	eKey, quit := e.queue.Get()
 | 
						|
	if quit {
 | 
						|
		return false
 | 
						|
	}
 | 
						|
	defer e.queue.Done(eKey)
 | 
						|
 | 
						|
	err := e.syncService(ctx, eKey.(string))
 | 
						|
	e.handleErr(err, eKey)
 | 
						|
 | 
						|
	return true
 | 
						|
}
 | 
						|
 | 
						|
func (e *Controller) handleErr(err error, key interface{}) {
 | 
						|
	if err == nil {
 | 
						|
		e.queue.Forget(key)
 | 
						|
		return
 | 
						|
	}
 | 
						|
 | 
						|
	ns, name, keyErr := cache.SplitMetaNamespaceKey(key.(string))
 | 
						|
	if keyErr != nil {
 | 
						|
		klog.ErrorS(err, "Failed to split meta namespace cache key", "key", key)
 | 
						|
	}
 | 
						|
 | 
						|
	if e.queue.NumRequeues(key) < maxRetries {
 | 
						|
		klog.V(2).InfoS("Error syncing endpoints, retrying", "service", klog.KRef(ns, name), "err", err)
 | 
						|
		e.queue.AddRateLimited(key)
 | 
						|
		return
 | 
						|
	}
 | 
						|
 | 
						|
	klog.Warningf("Dropping service %q out of the queue: %v", key, err)
 | 
						|
	e.queue.Forget(key)
 | 
						|
	utilruntime.HandleError(err)
 | 
						|
}
 | 
						|
 | 
						|
func (e *Controller) syncService(ctx context.Context, key string) error {
 | 
						|
	startTime := time.Now()
 | 
						|
	defer func() {
 | 
						|
		klog.V(4).Infof("Finished syncing service %q endpoints. (%v)", key, time.Since(startTime))
 | 
						|
	}()
 | 
						|
 | 
						|
	namespace, name, err := cache.SplitMetaNamespaceKey(key)
 | 
						|
	if err != nil {
 | 
						|
		return err
 | 
						|
	}
 | 
						|
	service, err := e.serviceLister.Services(namespace).Get(name)
 | 
						|
	if err != nil {
 | 
						|
		if !errors.IsNotFound(err) {
 | 
						|
			return err
 | 
						|
		}
 | 
						|
 | 
						|
		// Delete the corresponding endpoint, as the service has been deleted.
 | 
						|
		// TODO: Please note that this will delete an endpoint when a
 | 
						|
		// service is deleted. However, if we're down at the time when
 | 
						|
		// the service is deleted, we will miss that deletion, so this
 | 
						|
		// doesn't completely solve the problem. See #6877.
 | 
						|
		err = e.client.CoreV1().Endpoints(namespace).Delete(ctx, name, metav1.DeleteOptions{})
 | 
						|
		if err != nil && !errors.IsNotFound(err) {
 | 
						|
			return err
 | 
						|
		}
 | 
						|
		e.triggerTimeTracker.DeleteService(namespace, name)
 | 
						|
		return nil
 | 
						|
	}
 | 
						|
 | 
						|
	if service.Spec.Selector == nil {
 | 
						|
		// services without a selector receive no endpoints from this controller;
 | 
						|
		// these services will receive the endpoints that are created out-of-band via the REST API.
 | 
						|
		return nil
 | 
						|
	}
 | 
						|
 | 
						|
	klog.V(5).Infof("About to update endpoints for service %q", key)
 | 
						|
	pods, err := e.podLister.Pods(service.Namespace).List(labels.Set(service.Spec.Selector).AsSelectorPreValidated())
 | 
						|
	if err != nil {
 | 
						|
		// Since we're getting stuff from a local cache, it is
 | 
						|
		// basically impossible to get this error.
 | 
						|
		return err
 | 
						|
	}
 | 
						|
 | 
						|
	// If the user specified the older (deprecated) annotation, we have to respect it.
 | 
						|
	tolerateUnreadyEndpoints := service.Spec.PublishNotReadyAddresses
 | 
						|
	if v, ok := service.Annotations[TolerateUnreadyEndpointsAnnotation]; ok {
 | 
						|
		b, err := strconv.ParseBool(v)
 | 
						|
		if err == nil {
 | 
						|
			tolerateUnreadyEndpoints = b
 | 
						|
		} else {
 | 
						|
			utilruntime.HandleError(fmt.Errorf("Failed to parse annotation %v: %v", TolerateUnreadyEndpointsAnnotation, err))
 | 
						|
		}
 | 
						|
	}
 | 
						|
 | 
						|
	// We call ComputeEndpointLastChangeTriggerTime here to make sure that the
 | 
						|
	// state of the trigger time tracker gets updated even if the sync turns out
 | 
						|
	// to be no-op and we don't update the endpoints object.
 | 
						|
	endpointsLastChangeTriggerTime := e.triggerTimeTracker.
 | 
						|
		ComputeEndpointLastChangeTriggerTime(namespace, service, pods)
 | 
						|
 | 
						|
	subsets := []v1.EndpointSubset{}
 | 
						|
	var totalReadyEps int
 | 
						|
	var totalNotReadyEps int
 | 
						|
 | 
						|
	for _, pod := range pods {
 | 
						|
		if len(pod.Status.PodIP) == 0 {
 | 
						|
			klog.V(5).Infof("Failed to find an IP for pod %s/%s", pod.Namespace, pod.Name)
 | 
						|
			continue
 | 
						|
		}
 | 
						|
		if !tolerateUnreadyEndpoints && pod.DeletionTimestamp != nil {
 | 
						|
			klog.V(5).Infof("Pod is being deleted %s/%s", pod.Namespace, pod.Name)
 | 
						|
			continue
 | 
						|
		}
 | 
						|
 | 
						|
		ep, err := podToEndpointAddressForService(service, pod)
 | 
						|
		if err != nil {
 | 
						|
			// this will happen, if the cluster runs with some nodes configured as dual stack and some as not
 | 
						|
			// such as the case of an upgrade..
 | 
						|
			klog.V(2).Infof("failed to find endpoint for service:%v with ClusterIP:%v on pod:%v with error:%v", service.Name, service.Spec.ClusterIP, pod.Name, err)
 | 
						|
			continue
 | 
						|
		}
 | 
						|
 | 
						|
		epa := *ep
 | 
						|
		if endpointutil.ShouldSetHostname(pod, service) {
 | 
						|
			epa.Hostname = pod.Spec.Hostname
 | 
						|
		}
 | 
						|
 | 
						|
		// Allow headless service not to have ports.
 | 
						|
		if len(service.Spec.Ports) == 0 {
 | 
						|
			if service.Spec.ClusterIP == api.ClusterIPNone {
 | 
						|
				subsets, totalReadyEps, totalNotReadyEps = addEndpointSubset(subsets, pod, epa, nil, tolerateUnreadyEndpoints)
 | 
						|
				// No need to repack subsets for headless service without ports.
 | 
						|
			}
 | 
						|
		} else {
 | 
						|
			for i := range service.Spec.Ports {
 | 
						|
				servicePort := &service.Spec.Ports[i]
 | 
						|
				portNum, err := podutil.FindPort(pod, servicePort)
 | 
						|
				if err != nil {
 | 
						|
					klog.V(4).Infof("Failed to find port for service %s/%s: %v", service.Namespace, service.Name, err)
 | 
						|
					continue
 | 
						|
				}
 | 
						|
				epp := endpointPortFromServicePort(servicePort, portNum)
 | 
						|
 | 
						|
				var readyEps, notReadyEps int
 | 
						|
				subsets, readyEps, notReadyEps = addEndpointSubset(subsets, pod, epa, epp, tolerateUnreadyEndpoints)
 | 
						|
				totalReadyEps = totalReadyEps + readyEps
 | 
						|
				totalNotReadyEps = totalNotReadyEps + notReadyEps
 | 
						|
			}
 | 
						|
		}
 | 
						|
	}
 | 
						|
	subsets = endpoints.RepackSubsets(subsets)
 | 
						|
 | 
						|
	// See if there's actually an update here.
 | 
						|
	currentEndpoints, err := e.endpointsLister.Endpoints(service.Namespace).Get(service.Name)
 | 
						|
	if err != nil {
 | 
						|
		if errors.IsNotFound(err) {
 | 
						|
			currentEndpoints = &v1.Endpoints{
 | 
						|
				ObjectMeta: metav1.ObjectMeta{
 | 
						|
					Name:   service.Name,
 | 
						|
					Labels: service.Labels,
 | 
						|
				},
 | 
						|
			}
 | 
						|
		} else {
 | 
						|
			return err
 | 
						|
		}
 | 
						|
	}
 | 
						|
 | 
						|
	createEndpoints := len(currentEndpoints.ResourceVersion) == 0
 | 
						|
 | 
						|
	// Compare the sorted subsets and labels
 | 
						|
	// Remove the HeadlessService label from the endpoints if it exists,
 | 
						|
	// as this won't be set on the service itself
 | 
						|
	// and will cause a false negative in this diff check.
 | 
						|
	// But first check if it has that label to avoid expensive copies.
 | 
						|
	compareLabels := currentEndpoints.Labels
 | 
						|
	if _, ok := currentEndpoints.Labels[v1.IsHeadlessService]; ok {
 | 
						|
		compareLabels = utillabels.CloneAndRemoveLabel(currentEndpoints.Labels, v1.IsHeadlessService)
 | 
						|
	}
 | 
						|
	if !createEndpoints &&
 | 
						|
		apiequality.Semantic.DeepEqual(currentEndpoints.Subsets, subsets) &&
 | 
						|
		apiequality.Semantic.DeepEqual(compareLabels, service.Labels) &&
 | 
						|
		capacityAnnotationSetCorrectly(currentEndpoints.Annotations, currentEndpoints.Subsets) {
 | 
						|
		klog.V(5).Infof("endpoints are equal for %s/%s, skipping update", service.Namespace, service.Name)
 | 
						|
		return nil
 | 
						|
	}
 | 
						|
	newEndpoints := currentEndpoints.DeepCopy()
 | 
						|
	newEndpoints.Subsets = subsets
 | 
						|
	newEndpoints.Labels = service.Labels
 | 
						|
	if newEndpoints.Annotations == nil {
 | 
						|
		newEndpoints.Annotations = make(map[string]string)
 | 
						|
	}
 | 
						|
 | 
						|
	if !endpointsLastChangeTriggerTime.IsZero() {
 | 
						|
		newEndpoints.Annotations[v1.EndpointsLastChangeTriggerTime] =
 | 
						|
			endpointsLastChangeTriggerTime.UTC().Format(time.RFC3339Nano)
 | 
						|
	} else { // No new trigger time, clear the annotation.
 | 
						|
		delete(newEndpoints.Annotations, v1.EndpointsLastChangeTriggerTime)
 | 
						|
	}
 | 
						|
 | 
						|
	if truncateEndpoints(newEndpoints) {
 | 
						|
		newEndpoints.Annotations[v1.EndpointsOverCapacity] = truncated
 | 
						|
	} else {
 | 
						|
		delete(newEndpoints.Annotations, v1.EndpointsOverCapacity)
 | 
						|
	}
 | 
						|
 | 
						|
	if newEndpoints.Labels == nil {
 | 
						|
		newEndpoints.Labels = make(map[string]string)
 | 
						|
	}
 | 
						|
 | 
						|
	if !helper.IsServiceIPSet(service) {
 | 
						|
		newEndpoints.Labels = utillabels.CloneAndAddLabel(newEndpoints.Labels, v1.IsHeadlessService, "")
 | 
						|
	} else {
 | 
						|
		newEndpoints.Labels = utillabels.CloneAndRemoveLabel(newEndpoints.Labels, v1.IsHeadlessService)
 | 
						|
	}
 | 
						|
 | 
						|
	klog.V(4).Infof("Update endpoints for %v/%v, ready: %d not ready: %d", service.Namespace, service.Name, totalReadyEps, totalNotReadyEps)
 | 
						|
	if createEndpoints {
 | 
						|
		// No previous endpoints, create them
 | 
						|
		_, err = e.client.CoreV1().Endpoints(service.Namespace).Create(ctx, newEndpoints, metav1.CreateOptions{})
 | 
						|
	} else {
 | 
						|
		// Pre-existing
 | 
						|
		_, err = e.client.CoreV1().Endpoints(service.Namespace).Update(ctx, newEndpoints, metav1.UpdateOptions{})
 | 
						|
	}
 | 
						|
	if err != nil {
 | 
						|
		if createEndpoints && errors.IsForbidden(err) {
 | 
						|
			// A request is forbidden primarily for two reasons:
 | 
						|
			// 1. namespace is terminating, endpoint creation is not allowed by default.
 | 
						|
			// 2. policy is misconfigured, in which case no service would function anywhere.
 | 
						|
			// Given the frequency of 1, we log at a lower level.
 | 
						|
			klog.V(5).Infof("Forbidden from creating endpoints: %v", err)
 | 
						|
 | 
						|
			// If the namespace is terminating, creates will continue to fail. Simply drop the item.
 | 
						|
			if errors.HasStatusCause(err, v1.NamespaceTerminatingCause) {
 | 
						|
				return nil
 | 
						|
			}
 | 
						|
		}
 | 
						|
 | 
						|
		if createEndpoints {
 | 
						|
			e.eventRecorder.Eventf(newEndpoints, v1.EventTypeWarning, "FailedToCreateEndpoint", "Failed to create endpoint for service %v/%v: %v", service.Namespace, service.Name, err)
 | 
						|
		} else {
 | 
						|
			e.eventRecorder.Eventf(newEndpoints, v1.EventTypeWarning, "FailedToUpdateEndpoint", "Failed to update endpoint %v/%v: %v", service.Namespace, service.Name, err)
 | 
						|
		}
 | 
						|
 | 
						|
		return err
 | 
						|
	}
 | 
						|
	return nil
 | 
						|
}
 | 
						|
 | 
						|
// checkLeftoverEndpoints lists all currently existing endpoints and adds their
 | 
						|
// service to the queue. This will detect endpoints that exist with no
 | 
						|
// corresponding service; these endpoints need to be deleted. We only need to
 | 
						|
// do this once on startup, because in steady-state these are detected (but
 | 
						|
// some stragglers could have been left behind if the endpoint controller
 | 
						|
// reboots).
 | 
						|
func (e *Controller) checkLeftoverEndpoints() {
 | 
						|
	list, err := e.endpointsLister.List(labels.Everything())
 | 
						|
	if err != nil {
 | 
						|
		utilruntime.HandleError(fmt.Errorf("Unable to list endpoints (%v); orphaned endpoints will not be cleaned up. (They're pretty harmless, but you can restart this component if you want another attempt made.)", err))
 | 
						|
		return
 | 
						|
	}
 | 
						|
	for _, ep := range list {
 | 
						|
		if _, ok := ep.Annotations[resourcelock.LeaderElectionRecordAnnotationKey]; ok {
 | 
						|
			// when there are multiple controller-manager instances,
 | 
						|
			// we observe that it will delete leader-election endpoints after 5min
 | 
						|
			// and cause re-election
 | 
						|
			// so skip the delete here
 | 
						|
			// as leader-election only have endpoints without service
 | 
						|
			continue
 | 
						|
		}
 | 
						|
		key, err := controller.KeyFunc(ep)
 | 
						|
		if err != nil {
 | 
						|
			utilruntime.HandleError(fmt.Errorf("Unable to get key for endpoint %#v", ep))
 | 
						|
			continue
 | 
						|
		}
 | 
						|
		e.queue.Add(key)
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
func addEndpointSubset(subsets []v1.EndpointSubset, pod *v1.Pod, epa v1.EndpointAddress,
 | 
						|
	epp *v1.EndpointPort, tolerateUnreadyEndpoints bool) ([]v1.EndpointSubset, int, int) {
 | 
						|
	var readyEps int
 | 
						|
	var notReadyEps int
 | 
						|
	ports := []v1.EndpointPort{}
 | 
						|
	if epp != nil {
 | 
						|
		ports = append(ports, *epp)
 | 
						|
	}
 | 
						|
	if tolerateUnreadyEndpoints || podutil.IsPodReady(pod) {
 | 
						|
		subsets = append(subsets, v1.EndpointSubset{
 | 
						|
			Addresses: []v1.EndpointAddress{epa},
 | 
						|
			Ports:     ports,
 | 
						|
		})
 | 
						|
		readyEps++
 | 
						|
	} else if shouldPodBeInEndpoints(pod) {
 | 
						|
		klog.V(5).Infof("Pod is out of service: %s/%s", pod.Namespace, pod.Name)
 | 
						|
		subsets = append(subsets, v1.EndpointSubset{
 | 
						|
			NotReadyAddresses: []v1.EndpointAddress{epa},
 | 
						|
			Ports:             ports,
 | 
						|
		})
 | 
						|
		notReadyEps++
 | 
						|
	}
 | 
						|
	return subsets, readyEps, notReadyEps
 | 
						|
}
 | 
						|
 | 
						|
func shouldPodBeInEndpoints(pod *v1.Pod) bool {
 | 
						|
	switch pod.Spec.RestartPolicy {
 | 
						|
	case v1.RestartPolicyNever:
 | 
						|
		return pod.Status.Phase != v1.PodFailed && pod.Status.Phase != v1.PodSucceeded
 | 
						|
	case v1.RestartPolicyOnFailure:
 | 
						|
		return pod.Status.Phase != v1.PodSucceeded
 | 
						|
	default:
 | 
						|
		return true
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
func endpointPortFromServicePort(servicePort *v1.ServicePort, portNum int) *v1.EndpointPort {
 | 
						|
	epp := &v1.EndpointPort{
 | 
						|
		Name:        servicePort.Name,
 | 
						|
		Port:        int32(portNum),
 | 
						|
		Protocol:    servicePort.Protocol,
 | 
						|
		AppProtocol: servicePort.AppProtocol,
 | 
						|
	}
 | 
						|
	return epp
 | 
						|
}
 | 
						|
 | 
						|
// capacityAnnotationSetCorrectly returns false if number of endpoints is greater than maxCapacity or
 | 
						|
// returns true if underCapacity and the annotation is not set.
 | 
						|
func capacityAnnotationSetCorrectly(annotations map[string]string, subsets []v1.EndpointSubset) bool {
 | 
						|
	numEndpoints := 0
 | 
						|
	for _, subset := range subsets {
 | 
						|
		numEndpoints += len(subset.Addresses) + len(subset.NotReadyAddresses)
 | 
						|
	}
 | 
						|
	if numEndpoints > maxCapacity {
 | 
						|
		// If subsets are over capacity, they must be truncated so consider
 | 
						|
		// the annotation as not set correctly
 | 
						|
		return false
 | 
						|
	}
 | 
						|
	_, ok := annotations[v1.EndpointsOverCapacity]
 | 
						|
	return !ok
 | 
						|
}
 | 
						|
 | 
						|
// truncateEndpoints by best effort will distribute the endpoints over the subsets based on the proportion
 | 
						|
// of endpoints per subset and will prioritize Ready Endpoints over NotReady Endpoints.
 | 
						|
func truncateEndpoints(endpoints *v1.Endpoints) bool {
 | 
						|
	totalReady := 0
 | 
						|
	totalNotReady := 0
 | 
						|
	for _, subset := range endpoints.Subsets {
 | 
						|
		totalReady += len(subset.Addresses)
 | 
						|
		totalNotReady += len(subset.NotReadyAddresses)
 | 
						|
	}
 | 
						|
 | 
						|
	if totalReady+totalNotReady <= maxCapacity {
 | 
						|
		return false
 | 
						|
	}
 | 
						|
 | 
						|
	truncateReady := false
 | 
						|
	max := maxCapacity - totalReady
 | 
						|
	numTotal := totalNotReady
 | 
						|
	if totalReady > maxCapacity {
 | 
						|
		truncateReady = true
 | 
						|
		max = maxCapacity
 | 
						|
		numTotal = totalReady
 | 
						|
	}
 | 
						|
	canBeAdded := max
 | 
						|
 | 
						|
	for i := range endpoints.Subsets {
 | 
						|
		subset := endpoints.Subsets[i]
 | 
						|
		numInSubset := len(subset.Addresses)
 | 
						|
		if !truncateReady {
 | 
						|
			numInSubset = len(subset.NotReadyAddresses)
 | 
						|
		}
 | 
						|
 | 
						|
		// The number of endpoints per subset will be based on the propotion of endpoints
 | 
						|
		// in this subset versus the total number of endpoints. The proportion of endpoints
 | 
						|
		// will be rounded up which most likely will lead to the last subset having less
 | 
						|
		// endpoints than the expected proportion.
 | 
						|
		toBeAdded := int(math.Ceil((float64(numInSubset) / float64(numTotal)) * float64(max)))
 | 
						|
		// If there is not enough endpoints for the last subset, ensure only the number up
 | 
						|
		// to the capacity are added
 | 
						|
		if toBeAdded > canBeAdded {
 | 
						|
			toBeAdded = canBeAdded
 | 
						|
		}
 | 
						|
 | 
						|
		if truncateReady {
 | 
						|
			// Truncate ready Addresses to allocated proportion and truncate all not ready
 | 
						|
			// addresses
 | 
						|
			subset.Addresses = addressSubset(subset.Addresses, toBeAdded)
 | 
						|
			subset.NotReadyAddresses = []v1.EndpointAddress{}
 | 
						|
			canBeAdded -= len(subset.Addresses)
 | 
						|
		} else {
 | 
						|
			// Only truncate the not ready addresses
 | 
						|
			subset.NotReadyAddresses = addressSubset(subset.NotReadyAddresses, toBeAdded)
 | 
						|
			canBeAdded -= len(subset.NotReadyAddresses)
 | 
						|
		}
 | 
						|
		endpoints.Subsets[i] = subset
 | 
						|
	}
 | 
						|
	return true
 | 
						|
}
 | 
						|
 | 
						|
// addressSubset takes a list of addresses and returns a subset if the length is greater
 | 
						|
// than the maxNum. If less than the maxNum, the entire list is returned.
 | 
						|
func addressSubset(addresses []v1.EndpointAddress, maxNum int) []v1.EndpointAddress {
 | 
						|
	if len(addresses) <= maxNum {
 | 
						|
		return addresses
 | 
						|
	}
 | 
						|
	return addresses[0:maxNum]
 | 
						|
}
 |