mirror of
				https://github.com/optim-enterprises-bv/kubernetes.git
				synced 2025-10-31 02:08:13 +00:00 
			
		
		
		
	Adds support for PodCIDR allocation from the GCE cloud provider
If CIDRAllocatorType is set to `CloudCIDRAllocator`, then allocation of CIDR allocation instead is done by the external cloud provider and the node controller is only responsible for reflecting the allocation into the node spec. - Splits off the rangeAllocator from the cidr_allocator.go file. - Adds cloudCIDRAllocator, which is used when the cloud provider allocates the CIDR ranges externally. (GCE support only) - Updates RBAC permission for node controller to include PATCH
This commit is contained in:
		| @@ -477,6 +477,7 @@ func StartControllers(controllers map[string]InitFunc, s *options.CMServer, root | ||||
| 			serviceCIDR, | ||||
| 			int(s.NodeCIDRMaskSize), | ||||
| 			s.AllocateNodeCIDRs, | ||||
| 			nodecontroller.CIDRAllocatorType(s.CIDRAllocatorType), | ||||
| 			s.EnableTaintManager, | ||||
| 			utilfeature.DefaultFeatureGate.Enabled(features.TaintBasedEvictions), | ||||
| 		) | ||||
|   | ||||
| @@ -188,7 +188,10 @@ func (s *CMServer) AddFlags(fs *pflag.FlagSet, allControllers []string, disabled | ||||
| 	fs.StringVar(&s.ClusterCIDR, "cluster-cidr", s.ClusterCIDR, "CIDR Range for Pods in cluster.") | ||||
| 	fs.StringVar(&s.ServiceCIDR, "service-cluster-ip-range", s.ServiceCIDR, "CIDR Range for Services in cluster.") | ||||
| 	fs.Int32Var(&s.NodeCIDRMaskSize, "node-cidr-mask-size", s.NodeCIDRMaskSize, "Mask size for node cidr in cluster.") | ||||
| 	fs.BoolVar(&s.AllocateNodeCIDRs, "allocate-node-cidrs", false, "Should CIDRs for Pods be allocated and set on the cloud provider.") | ||||
| 	fs.BoolVar(&s.AllocateNodeCIDRs, "allocate-node-cidrs", false, | ||||
| 		"Should CIDRs for Pods be allocated and set on the cloud provider.") | ||||
| 	fs.StringVar(&s.CIDRAllocatorType, "cidr-allocator-type", "RangeAllocator", | ||||
| 		"Type of CIDR allocator to use") | ||||
| 	fs.BoolVar(&s.ConfigureCloudRoutes, "configure-cloud-routes", true, "Should CIDRs allocated by allocate-node-cidrs be configured on the cloud provider.") | ||||
| 	fs.StringVar(&s.Master, "master", s.Master, "The address of the Kubernetes API server (overrides any value in kubeconfig)") | ||||
| 	fs.StringVar(&s.Kubeconfig, "kubeconfig", s.Kubeconfig, "Path to kubeconfig file with authorization and master location information.") | ||||
|   | ||||
| @@ -794,9 +794,11 @@ type KubeControllerManagerConfiguration struct { | ||||
| 	ServiceCIDR string | ||||
| 	// NodeCIDRMaskSize is the mask size for node cidr in cluster. | ||||
| 	NodeCIDRMaskSize int32 | ||||
| 	// allocateNodeCIDRs enables CIDRs for Pods to be allocated and, if | ||||
| 	// AllocateNodeCIDRs enables CIDRs for Pods to be allocated and, if | ||||
| 	// ConfigureCloudRoutes is true, to be set on the cloud provider. | ||||
| 	AllocateNodeCIDRs bool | ||||
| 	// CIDRAllocatorType determines what kind of pod CIDR allocator will be used. | ||||
| 	CIDRAllocatorType string | ||||
| 	// configureCloudRoutes enables CIDRs allocated with allocateNodeCIDRs | ||||
| 	// to be configured on the cloud provider. | ||||
| 	ConfigureCloudRoutes bool | ||||
|   | ||||
| @@ -217,10 +217,10 @@ func (gce *GCECloud) CurrentNodeName(hostname string) (types.NodeName, error) { | ||||
| 	return types.NodeName(hostname), nil | ||||
| } | ||||
|  | ||||
| // PodCIDRs returns a list of CIDR ranges that are assigned to the | ||||
| // AliasRanges returns a list of CIDR ranges that are assigned to the | ||||
| // `node` for allocation to pods. Returns a list of the form | ||||
| // "<ip>/<netmask>". | ||||
| func (gce *GCECloud) PodCIDRs(nodeName types.NodeName) (cidrs []string, err error) { | ||||
| func (gce *GCECloud) AliasRanges(nodeName types.NodeName) (cidrs []string, err error) { | ||||
| 	var instance *gceInstance | ||||
| 	instance, err = gce.getInstanceByName(mapNodeNameToInstanceName(nodeName)) | ||||
| 	if err != nil { | ||||
|   | ||||
| @@ -8,56 +8,6 @@ load( | ||||
|     "go_test", | ||||
| ) | ||||
|  | ||||
| go_library( | ||||
|     name = "go_default_library", | ||||
|     srcs = [ | ||||
|         "cidr_allocator.go", | ||||
|         "cidr_set.go", | ||||
|         "controller_utils.go", | ||||
|         "doc.go", | ||||
|         "metrics.go", | ||||
|         "nodecontroller.go", | ||||
|         "rate_limited_queue.go", | ||||
|         "taint_controller.go", | ||||
|         "timed_workers.go", | ||||
|     ], | ||||
|     tags = ["automanaged"], | ||||
|     deps = [ | ||||
|         "//pkg/api:go_default_library", | ||||
|         "//pkg/api/v1:go_default_library", | ||||
|         "//pkg/client/clientset_generated/clientset:go_default_library", | ||||
|         "//pkg/client/informers/informers_generated/externalversions/core/v1:go_default_library", | ||||
|         "//pkg/client/informers/informers_generated/externalversions/extensions/v1beta1:go_default_library", | ||||
|         "//pkg/client/listers/core/v1:go_default_library", | ||||
|         "//pkg/client/listers/extensions/v1beta1:go_default_library", | ||||
|         "//pkg/cloudprovider:go_default_library", | ||||
|         "//pkg/controller:go_default_library", | ||||
|         "//pkg/kubelet/util/format:go_default_library", | ||||
|         "//pkg/util/metrics:go_default_library", | ||||
|         "//pkg/util/node:go_default_library", | ||||
|         "//pkg/util/system:go_default_library", | ||||
|         "//pkg/util/version:go_default_library", | ||||
|         "//vendor:github.com/golang/glog", | ||||
|         "//vendor:github.com/prometheus/client_golang/prometheus", | ||||
|         "//vendor:k8s.io/apimachinery/pkg/api/equality", | ||||
|         "//vendor:k8s.io/apimachinery/pkg/api/errors", | ||||
|         "//vendor:k8s.io/apimachinery/pkg/apis/meta/v1", | ||||
|         "//vendor:k8s.io/apimachinery/pkg/fields", | ||||
|         "//vendor:k8s.io/apimachinery/pkg/labels", | ||||
|         "//vendor:k8s.io/apimachinery/pkg/types", | ||||
|         "//vendor:k8s.io/apimachinery/pkg/util/errors", | ||||
|         "//vendor:k8s.io/apimachinery/pkg/util/runtime", | ||||
|         "//vendor:k8s.io/apimachinery/pkg/util/sets", | ||||
|         "//vendor:k8s.io/apimachinery/pkg/util/wait", | ||||
|         "//vendor:k8s.io/client-go/kubernetes/typed/core/v1", | ||||
|         "//vendor:k8s.io/client-go/pkg/api/v1", | ||||
|         "//vendor:k8s.io/client-go/tools/cache", | ||||
|         "//vendor:k8s.io/client-go/tools/record", | ||||
|         "//vendor:k8s.io/client-go/util/flowcontrol", | ||||
|         "//vendor:k8s.io/client-go/util/workqueue", | ||||
|     ], | ||||
| ) | ||||
|  | ||||
| go_test( | ||||
|     name = "go_default_test", | ||||
|     srcs = [ | ||||
| @@ -96,6 +46,59 @@ go_test( | ||||
|     ], | ||||
| ) | ||||
|  | ||||
| go_library( | ||||
|     name = "go_default_library", | ||||
|     srcs = [ | ||||
|         "cidr_allocator.go", | ||||
|         "cidr_set.go", | ||||
|         "cloud_cidr_allocator.go", | ||||
|         "controller_utils.go", | ||||
|         "doc.go", | ||||
|         "metrics.go", | ||||
|         "nodecontroller.go", | ||||
|         "range_allocator.go", | ||||
|         "rate_limited_queue.go", | ||||
|         "taint_controller.go", | ||||
|         "timed_workers.go", | ||||
|     ], | ||||
|     tags = ["automanaged"], | ||||
|     deps = [ | ||||
|         "//pkg/api:go_default_library", | ||||
|         "//pkg/api/v1:go_default_library", | ||||
|         "//pkg/client/clientset_generated/clientset:go_default_library", | ||||
|         "//pkg/client/informers/informers_generated/externalversions/core/v1:go_default_library", | ||||
|         "//pkg/client/informers/informers_generated/externalversions/extensions/v1beta1:go_default_library", | ||||
|         "//pkg/client/listers/core/v1:go_default_library", | ||||
|         "//pkg/client/listers/extensions/v1beta1:go_default_library", | ||||
|         "//pkg/cloudprovider:go_default_library", | ||||
|         "//pkg/cloudprovider/providers/gce:go_default_library", | ||||
|         "//pkg/controller:go_default_library", | ||||
|         "//pkg/kubelet/util/format:go_default_library", | ||||
|         "//pkg/util/metrics:go_default_library", | ||||
|         "//pkg/util/node:go_default_library", | ||||
|         "//pkg/util/system:go_default_library", | ||||
|         "//pkg/util/version:go_default_library", | ||||
|         "//vendor:github.com/golang/glog", | ||||
|         "//vendor:github.com/prometheus/client_golang/prometheus", | ||||
|         "//vendor:k8s.io/apimachinery/pkg/api/equality", | ||||
|         "//vendor:k8s.io/apimachinery/pkg/api/errors", | ||||
|         "//vendor:k8s.io/apimachinery/pkg/apis/meta/v1", | ||||
|         "//vendor:k8s.io/apimachinery/pkg/fields", | ||||
|         "//vendor:k8s.io/apimachinery/pkg/labels", | ||||
|         "//vendor:k8s.io/apimachinery/pkg/types", | ||||
|         "//vendor:k8s.io/apimachinery/pkg/util/errors", | ||||
|         "//vendor:k8s.io/apimachinery/pkg/util/runtime", | ||||
|         "//vendor:k8s.io/apimachinery/pkg/util/sets", | ||||
|         "//vendor:k8s.io/apimachinery/pkg/util/wait", | ||||
|         "//vendor:k8s.io/client-go/kubernetes/typed/core/v1", | ||||
|         "//vendor:k8s.io/client-go/pkg/api/v1", | ||||
|         "//vendor:k8s.io/client-go/tools/cache", | ||||
|         "//vendor:k8s.io/client-go/tools/record", | ||||
|         "//vendor:k8s.io/client-go/util/flowcontrol", | ||||
|         "//vendor:k8s.io/client-go/util/workqueue", | ||||
|     ], | ||||
| ) | ||||
|  | ||||
| filegroup( | ||||
|     name = "package-srcs", | ||||
|     srcs = glob(["**"]), | ||||
|   | ||||
| @@ -18,259 +18,34 @@ package node | ||||
|  | ||||
| import ( | ||||
| 	"errors" | ||||
| 	"fmt" | ||||
| 	"net" | ||||
| 	"sync" | ||||
|  | ||||
| 	apierrors "k8s.io/apimachinery/pkg/api/errors" | ||||
| 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" | ||||
| 	"k8s.io/apimachinery/pkg/util/sets" | ||||
| 	"k8s.io/apimachinery/pkg/util/wait" | ||||
| 	v1core "k8s.io/client-go/kubernetes/typed/core/v1" | ||||
| 	clientv1 "k8s.io/client-go/pkg/api/v1" | ||||
| 	"k8s.io/client-go/tools/record" | ||||
| 	"k8s.io/kubernetes/pkg/api" | ||||
| 	"k8s.io/kubernetes/pkg/api/v1" | ||||
| 	"k8s.io/kubernetes/pkg/client/clientset_generated/clientset" | ||||
|  | ||||
| 	"github.com/golang/glog" | ||||
| 	v1 "k8s.io/kubernetes/pkg/api/v1" | ||||
| ) | ||||
|  | ||||
| // TODO: figure out the good setting for those constants. | ||||
| const ( | ||||
| 	// controls how many NodeSpec updates NC can process concurrently. | ||||
| 	cidrUpdateWorkers   = 10 | ||||
| 	cidrUpdateQueueSize = 5000 | ||||
| 	// podCIDRUpdateRetry controls the number of retries of writing Node.Spec.PodCIDR update. | ||||
| 	podCIDRUpdateRetry = 5 | ||||
| ) | ||||
|  | ||||
| var errCIDRRangeNoCIDRsRemaining = errors.New("CIDR allocation failed; there are no remaining CIDRs left to allocate in the accepted range") | ||||
| var errCIDRRangeNoCIDRsRemaining = errors.New( | ||||
| 	"CIDR allocation failed; there are no remaining CIDRs left to allocate in the accepted range") | ||||
|  | ||||
| type nodeAndCIDR struct { | ||||
| 	cidr     *net.IPNet | ||||
| 	nodeName string | ||||
| } | ||||
|  | ||||
| // CIDRAllocator is an interface implemented by things that know how to allocate/occupy/recycle CIDR for nodes. | ||||
| // CIDRAllocatorType is the type of the allocator to use. | ||||
| type CIDRAllocatorType string | ||||
|  | ||||
| const ( | ||||
| 	RangeAllocatorType CIDRAllocatorType = "RangeAllocator" | ||||
| 	CloudAllocatorType CIDRAllocatorType = "CloudAllocator" | ||||
| ) | ||||
|  | ||||
| // CIDRAllocator is an interface implemented by things that know how to | ||||
| // allocate/occupy/recycle CIDR for nodes. | ||||
| type CIDRAllocator interface { | ||||
| 	// AllocateOrOccupyCIDR looks at the given node, assigns it a valid | ||||
| 	// CIDR if it doesn't currently have one or mark the CIDR as used if | ||||
| 	// the node already have one. | ||||
| 	AllocateOrOccupyCIDR(node *v1.Node) error | ||||
| 	// ReleaseCIDR releases the CIDR of the removed node | ||||
| 	ReleaseCIDR(node *v1.Node) error | ||||
| } | ||||
|  | ||||
| type rangeAllocator struct { | ||||
| 	client      clientset.Interface | ||||
| 	cidrs       *cidrSet | ||||
| 	clusterCIDR *net.IPNet | ||||
| 	maxCIDRs    int | ||||
| 	// Channel that is used to pass updating Nodes with assigned CIDRs to the background | ||||
| 	// This increases a throughput of CIDR assignment by not blocking on long operations. | ||||
| 	nodeCIDRUpdateChannel chan nodeAndCIDR | ||||
| 	recorder              record.EventRecorder | ||||
| 	// Keep a set of nodes that are currectly being processed to avoid races in CIDR allocation | ||||
| 	sync.Mutex | ||||
| 	nodesInProcessing sets.String | ||||
| } | ||||
|  | ||||
| // NewCIDRRangeAllocator returns a CIDRAllocator to allocate CIDR for node | ||||
| // Caller must ensure subNetMaskSize is not less than cluster CIDR mask size. | ||||
| // Caller must always pass in a list of existing nodes so the new allocator | ||||
| // can initialize its CIDR map. NodeList is only nil in testing. | ||||
| func NewCIDRRangeAllocator(client clientset.Interface, clusterCIDR *net.IPNet, serviceCIDR *net.IPNet, subNetMaskSize int, nodeList *v1.NodeList) (CIDRAllocator, error) { | ||||
| 	eventBroadcaster := record.NewBroadcaster() | ||||
| 	recorder := eventBroadcaster.NewRecorder(api.Scheme, clientv1.EventSource{Component: "cidrAllocator"}) | ||||
| 	eventBroadcaster.StartLogging(glog.Infof) | ||||
| 	if client != nil { | ||||
| 		glog.V(0).Infof("Sending events to api server.") | ||||
| 		eventBroadcaster.StartRecordingToSink(&v1core.EventSinkImpl{Interface: v1core.New(client.Core().RESTClient()).Events("")}) | ||||
| 	} else { | ||||
| 		glog.Fatalf("kubeClient is nil when starting NodeController") | ||||
| 	} | ||||
|  | ||||
| 	ra := &rangeAllocator{ | ||||
| 		client:                client, | ||||
| 		cidrs:                 newCIDRSet(clusterCIDR, subNetMaskSize), | ||||
| 		clusterCIDR:           clusterCIDR, | ||||
| 		nodeCIDRUpdateChannel: make(chan nodeAndCIDR, cidrUpdateQueueSize), | ||||
| 		recorder:              recorder, | ||||
| 		nodesInProcessing:     sets.NewString(), | ||||
| 	} | ||||
|  | ||||
| 	if serviceCIDR != nil { | ||||
| 		ra.filterOutServiceRange(serviceCIDR) | ||||
| 	} else { | ||||
| 		glog.V(0).Info("No Service CIDR provided. Skipping filtering out service addresses.") | ||||
| 	} | ||||
|  | ||||
| 	if nodeList != nil { | ||||
| 		for _, node := range nodeList.Items { | ||||
| 			if node.Spec.PodCIDR == "" { | ||||
| 				glog.Infof("Node %v has no CIDR, ignoring", node.Name) | ||||
| 				continue | ||||
| 			} else { | ||||
| 				glog.Infof("Node %v has CIDR %s, occupying it in CIDR map", node.Name, node.Spec.PodCIDR) | ||||
| 			} | ||||
| 			if err := ra.occupyCIDR(&node); err != nil { | ||||
| 				// This will happen if: | ||||
| 				// 1. We find garbage in the podCIDR field. Retrying is useless. | ||||
| 				// 2. CIDR out of range: This means a node CIDR has changed. | ||||
| 				// This error will keep crashing controller-manager. | ||||
| 				return nil, err | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
| 	for i := 0; i < cidrUpdateWorkers; i++ { | ||||
| 		go func(stopChan <-chan struct{}) { | ||||
| 			for { | ||||
| 				select { | ||||
| 				case workItem, ok := <-ra.nodeCIDRUpdateChannel: | ||||
| 					if !ok { | ||||
| 						glog.Warning("NodeCIDRUpdateChannel read returned false.") | ||||
| 						return | ||||
| 					} | ||||
| 					ra.updateCIDRAllocation(workItem) | ||||
| 				case <-stopChan: | ||||
| 					return | ||||
| 				} | ||||
| 			} | ||||
| 		}(wait.NeverStop) | ||||
| 	} | ||||
|  | ||||
| 	return ra, nil | ||||
| } | ||||
|  | ||||
| func (r *rangeAllocator) insertNodeToProcessing(nodeName string) bool { | ||||
| 	r.Lock() | ||||
| 	defer r.Unlock() | ||||
| 	if r.nodesInProcessing.Has(nodeName) { | ||||
| 		return false | ||||
| 	} | ||||
| 	r.nodesInProcessing.Insert(nodeName) | ||||
| 	return true | ||||
| } | ||||
|  | ||||
| func (r *rangeAllocator) removeNodeFromProcessing(nodeName string) { | ||||
| 	r.Lock() | ||||
| 	defer r.Unlock() | ||||
| 	r.nodesInProcessing.Delete(nodeName) | ||||
| } | ||||
|  | ||||
| func (r *rangeAllocator) occupyCIDR(node *v1.Node) error { | ||||
| 	defer r.removeNodeFromProcessing(node.Name) | ||||
| 	if node.Spec.PodCIDR == "" { | ||||
| 		return nil | ||||
| 	} | ||||
| 	_, podCIDR, err := net.ParseCIDR(node.Spec.PodCIDR) | ||||
| 	if err != nil { | ||||
| 		return fmt.Errorf("failed to parse node %s, CIDR %s", node.Name, node.Spec.PodCIDR) | ||||
| 	} | ||||
| 	if err := r.cidrs.occupy(podCIDR); err != nil { | ||||
| 		return fmt.Errorf("failed to mark cidr as occupied: %v", err) | ||||
| 	} | ||||
| 	return nil | ||||
| } | ||||
|  | ||||
| // AllocateOrOccupyCIDR looks at the given node, assigns it a valid CIDR | ||||
| // if it doesn't currently have one or mark the CIDR as used if the node already have one. | ||||
| // WARNING: If you're adding any return calls or defer any more work from this function | ||||
| // you have to handle correctly nodesInProcessing. | ||||
| func (r *rangeAllocator) AllocateOrOccupyCIDR(node *v1.Node) error { | ||||
| 	if node == nil { | ||||
| 		return nil | ||||
| 	} | ||||
| 	if !r.insertNodeToProcessing(node.Name) { | ||||
| 		glog.V(2).Infof("Node %v is already in a process of CIDR assignment.", node.Name) | ||||
| 		return nil | ||||
| 	} | ||||
| 	if node.Spec.PodCIDR != "" { | ||||
| 		return r.occupyCIDR(node) | ||||
| 	} | ||||
| 	podCIDR, err := r.cidrs.allocateNext() | ||||
| 	if err != nil { | ||||
| 		r.removeNodeFromProcessing(node.Name) | ||||
| 		recordNodeStatusChange(r.recorder, node, "CIDRNotAvailable") | ||||
| 		return fmt.Errorf("failed to allocate cidr: %v", err) | ||||
| 	} | ||||
|  | ||||
| 	glog.V(10).Infof("Putting node %s with CIDR %s into the work queue", node.Name, podCIDR) | ||||
| 	r.nodeCIDRUpdateChannel <- nodeAndCIDR{ | ||||
| 		nodeName: node.Name, | ||||
| 		cidr:     podCIDR, | ||||
| 	} | ||||
| 	return nil | ||||
| } | ||||
|  | ||||
| // ReleaseCIDR releases the CIDR of the removed node | ||||
| func (r *rangeAllocator) ReleaseCIDR(node *v1.Node) error { | ||||
| 	if node == nil || node.Spec.PodCIDR == "" { | ||||
| 		return nil | ||||
| 	} | ||||
| 	_, podCIDR, err := net.ParseCIDR(node.Spec.PodCIDR) | ||||
| 	if err != nil { | ||||
| 		return fmt.Errorf("Failed to parse CIDR %s on Node %v: %v", node.Spec.PodCIDR, node.Name, err) | ||||
| 	} | ||||
|  | ||||
| 	glog.V(4).Infof("release CIDR %s", node.Spec.PodCIDR) | ||||
| 	if err = r.cidrs.release(podCIDR); err != nil { | ||||
| 		return fmt.Errorf("Error when releasing CIDR %v: %v", node.Spec.PodCIDR, err) | ||||
| 	} | ||||
| 	return err | ||||
| } | ||||
|  | ||||
| // Marks all CIDRs with subNetMaskSize that belongs to serviceCIDR as used, | ||||
| // so that they won't be assignable. | ||||
| func (r *rangeAllocator) filterOutServiceRange(serviceCIDR *net.IPNet) { | ||||
| 	// Checks if service CIDR has a nonempty intersection with cluster CIDR. It is the case if either | ||||
| 	// clusterCIDR contains serviceCIDR with clusterCIDR's Mask applied (this means that clusterCIDR contains serviceCIDR) | ||||
| 	// or vice versa (which means that serviceCIDR contains clusterCIDR). | ||||
| 	if !r.clusterCIDR.Contains(serviceCIDR.IP.Mask(r.clusterCIDR.Mask)) && !serviceCIDR.Contains(r.clusterCIDR.IP.Mask(serviceCIDR.Mask)) { | ||||
| 		return | ||||
| 	} | ||||
|  | ||||
| 	if err := r.cidrs.occupy(serviceCIDR); err != nil { | ||||
| 		glog.Errorf("Error filtering out service cidr %v: %v", serviceCIDR, err) | ||||
| 	} | ||||
| } | ||||
|  | ||||
| // Assigns CIDR to Node and sends an update to the API server. | ||||
| func (r *rangeAllocator) updateCIDRAllocation(data nodeAndCIDR) error { | ||||
| 	var err error | ||||
| 	var node *v1.Node | ||||
| 	defer r.removeNodeFromProcessing(data.nodeName) | ||||
| 	for rep := 0; rep < podCIDRUpdateRetry; rep++ { | ||||
| 		// TODO: change it to using PATCH instead of full Node updates. | ||||
| 		node, err = r.client.Core().Nodes().Get(data.nodeName, metav1.GetOptions{}) | ||||
| 		if err != nil { | ||||
| 			glog.Errorf("Failed while getting node %v to retry updating Node.Spec.PodCIDR: %v", data.nodeName, err) | ||||
| 			continue | ||||
| 		} | ||||
| 		if node.Spec.PodCIDR != "" { | ||||
| 			glog.Errorf("Node %v already has allocated CIDR %v. Releasing assigned one if different.", node.Name, node.Spec.PodCIDR) | ||||
| 			if node.Spec.PodCIDR != data.cidr.String() { | ||||
| 				if err := r.cidrs.release(data.cidr); err != nil { | ||||
| 					glog.Errorf("Error when releasing CIDR %v", data.cidr.String()) | ||||
| 				} | ||||
| 			} | ||||
| 			return nil | ||||
| 		} | ||||
| 		node.Spec.PodCIDR = data.cidr.String() | ||||
| 		if _, err := r.client.Core().Nodes().Update(node); err != nil { | ||||
| 			glog.Errorf("Failed while updating Node.Spec.PodCIDR (%d retries left): %v", podCIDRUpdateRetry-rep-1, err) | ||||
| 		} else { | ||||
| 			break | ||||
| 		} | ||||
| 	} | ||||
| 	if err != nil { | ||||
| 		recordNodeStatusChange(r.recorder, node, "CIDRAssignmentFailed") | ||||
| 		// We accept the fact that we may leek CIDRs here. This is safer than releasing | ||||
| 		// them in case when we don't know if request went through. | ||||
| 		// NodeController restart will return all falsely allocated CIDRs to the pool. | ||||
| 		if !apierrors.IsServerTimeout(err) { | ||||
| 			glog.Errorf("CIDR assignment for node %v failed: %v. Releasing allocated CIDR", data.nodeName, err) | ||||
| 			if releaseErr := r.cidrs.release(data.cidr); releaseErr != nil { | ||||
| 				glog.Errorf("Error releasing allocated CIDR for node %v: %v", data.nodeName, releaseErr) | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
| 	return err | ||||
| } | ||||
|   | ||||
							
								
								
									
										143
									
								
								pkg/controller/node/cloud_cidr_allocator.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										143
									
								
								pkg/controller/node/cloud_cidr_allocator.go
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,143 @@ | ||||
| /* | ||||
| Copyright 2016 The Kubernetes Authors. | ||||
|  | ||||
| Licensed under the Apache License, Version 2.0 (the "License"); | ||||
| you may not use this file except in compliance with the License. | ||||
| You may obtain a copy of the License at | ||||
|  | ||||
|     http://www.apache.org/licenses/LICENSE-2.0 | ||||
|  | ||||
| Unless required by applicable law or agreed to in writing, software | ||||
| distributed under the License is distributed on an "AS IS" BASIS, | ||||
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
| See the License for the specific language governing permissions and | ||||
| limitations under the License. | ||||
| */ | ||||
|  | ||||
| package node | ||||
|  | ||||
| import ( | ||||
| 	"fmt" | ||||
| 	"sync" | ||||
|  | ||||
| 	"github.com/golang/glog" | ||||
|  | ||||
| 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" | ||||
| 	"k8s.io/apimachinery/pkg/types" | ||||
|  | ||||
| 	clientv1 "k8s.io/client-go/pkg/api/v1" | ||||
| 	"k8s.io/client-go/tools/record" | ||||
|  | ||||
| 	"k8s.io/kubernetes/pkg/api" | ||||
| 	"k8s.io/kubernetes/pkg/api/v1" | ||||
| 	"k8s.io/kubernetes/pkg/client/clientset_generated/clientset" | ||||
| 	"k8s.io/kubernetes/pkg/cloudprovider" | ||||
| 	"k8s.io/kubernetes/pkg/cloudprovider/providers/gce" | ||||
| 	nodeutil "k8s.io/kubernetes/pkg/util/node" | ||||
| ) | ||||
|  | ||||
| // cloudCIDRAllocator allocates node CIDRs according to IP address aliases | ||||
| // assigned by the cloud provider. In this case, the allocation and | ||||
| // deallocation is delegated to the external provider, and the controller | ||||
| // merely takes the assignment and updates the node spec. | ||||
| type cloudCIDRAllocator struct { | ||||
| 	lock sync.Mutex | ||||
|  | ||||
| 	client clientset.Interface | ||||
| 	cloud  *gce.GCECloud | ||||
|  | ||||
| 	recorder record.EventRecorder | ||||
| } | ||||
|  | ||||
| var _ CIDRAllocator = (*cloudCIDRAllocator)(nil) | ||||
|  | ||||
| func NewCloudCIDRAllocator( | ||||
| 	client clientset.Interface, | ||||
| 	cloud cloudprovider.Interface) (ca CIDRAllocator, err error) { | ||||
|  | ||||
| 	gceCloud, ok := cloud.(*gce.GCECloud) | ||||
| 	if !ok { | ||||
| 		err = fmt.Errorf("cloudCIDRAllocator does not support %v provider", cloud.ProviderName()) | ||||
| 		return | ||||
| 	} | ||||
|  | ||||
| 	ca = &cloudCIDRAllocator{ | ||||
| 		client: client, | ||||
| 		cloud:  gceCloud, | ||||
| 		recorder: record.NewBroadcaster().NewRecorder( | ||||
| 			api.Scheme, | ||||
| 			clientv1.EventSource{Component: "cidrAllocator"}), | ||||
| 	} | ||||
|  | ||||
| 	glog.V(0).Infof("Using cloud CIDR allocator (provider: %v)", cloud.ProviderName()) | ||||
|  | ||||
| 	return | ||||
| } | ||||
|  | ||||
| func (ca *cloudCIDRAllocator) AllocateOrOccupyCIDR(node *v1.Node) error { | ||||
| 	glog.V(2).Infof("Updating PodCIDR for node %v", node.Name) | ||||
|  | ||||
| 	cidrs, err := ca.cloud.AliasRanges(types.NodeName(node.Name)) | ||||
|  | ||||
| 	if err != nil { | ||||
| 		recordNodeStatusChange(ca.recorder, node, "CIDRNotAvailable") | ||||
| 		return fmt.Errorf("failed to allocate cidr: %v", err) | ||||
| 	} | ||||
|  | ||||
| 	if len(cidrs) == 0 { | ||||
| 		recordNodeStatusChange(ca.recorder, node, "CIDRNotAvailable") | ||||
| 		glog.V(2).Infof("Node %v has no CIDRs", node.Name) | ||||
| 		return fmt.Errorf("failed to allocate cidr (none exist)") | ||||
| 	} | ||||
|  | ||||
| 	node, err = ca.client.Core().Nodes().Get(node.Name, metav1.GetOptions{}) | ||||
| 	if err != nil { | ||||
| 		glog.Errorf("Could not get Node object from Kubernetes: %v", err) | ||||
| 		return err | ||||
| 	} | ||||
|  | ||||
| 	podCIDR := cidrs[0] | ||||
|  | ||||
| 	if node.Spec.PodCIDR != "" { | ||||
| 		if node.Spec.PodCIDR == podCIDR { | ||||
| 			glog.V(3).Infof("Node %v has PodCIDR %v", node.Name, podCIDR) | ||||
| 			return nil | ||||
| 		} | ||||
| 		glog.Errorf("PodCIDR cannot be reassigned, node %v spec has %v, but cloud provider has assigned %v", | ||||
| 			node.Name, node.Spec.PodCIDR, podCIDR) | ||||
| 		// We fall through and set the CIDR despite this error. This | ||||
| 		// implements the same logic as implemented in the | ||||
| 		// rangeAllocator. | ||||
| 		// | ||||
| 		// See https://github.com/kubernetes/kubernetes/pull/42147#discussion_r103357248 | ||||
| 	} | ||||
|  | ||||
| 	node.Spec.PodCIDR = cidrs[0] | ||||
| 	if _, err := ca.client.Core().Nodes().Update(node); err == nil { | ||||
| 		glog.V(2).Infof("Node %v PodCIDR set to %v", node.Name, podCIDR) | ||||
| 	} else { | ||||
| 		glog.Errorf("Could not update node %v PodCIDR to %v: %v", | ||||
| 			node.Name, podCIDR, err) | ||||
| 		return err | ||||
| 	} | ||||
|  | ||||
| 	err = nodeutil.SetNodeCondition(ca.client, types.NodeName(node.Name), v1.NodeCondition{ | ||||
| 		Type:               v1.NodeNetworkUnavailable, | ||||
| 		Status:             v1.ConditionFalse, | ||||
| 		Reason:             "RouteCreated", | ||||
| 		Message:            "NodeController create implicit route", | ||||
| 		LastTransitionTime: metav1.Now(), | ||||
| 	}) | ||||
| 	if err != nil { | ||||
| 		glog.Errorf("Error setting route status for node %v: %v", | ||||
| 			node.Name, err) | ||||
| 	} | ||||
|  | ||||
| 	return err | ||||
| } | ||||
|  | ||||
| func (ca *cloudCIDRAllocator) ReleaseCIDR(node *v1.Node) error { | ||||
| 	glog.V(2).Infof("Node %v PodCIDR (%v) will be released by external cloud provider (not managed by controller)", | ||||
| 		node.Name, node.Spec.PodCIDR) | ||||
| 	return nil | ||||
| } | ||||
| @@ -109,11 +109,13 @@ type nodeStatusData struct { | ||||
|  | ||||
| type NodeController struct { | ||||
| 	allocateNodeCIDRs bool | ||||
| 	cloud             cloudprovider.Interface | ||||
| 	clusterCIDR       *net.IPNet | ||||
| 	serviceCIDR       *net.IPNet | ||||
| 	knownNodeSet      map[string]*v1.Node | ||||
| 	kubeClient        clientset.Interface | ||||
| 	allocatorType     CIDRAllocatorType | ||||
|  | ||||
| 	cloud        cloudprovider.Interface | ||||
| 	clusterCIDR  *net.IPNet | ||||
| 	serviceCIDR  *net.IPNet | ||||
| 	knownNodeSet map[string]*v1.Node | ||||
| 	kubeClient   clientset.Interface | ||||
| 	// Method for easy mocking in unittest. | ||||
| 	lookupIP func(host string) ([]net.IP, error) | ||||
| 	// Value used if sync_nodes_status=False. NodeController will not proactively | ||||
| @@ -162,9 +164,8 @@ type NodeController struct { | ||||
|  | ||||
| 	podInformerSynced cache.InformerSynced | ||||
|  | ||||
| 	// allocate/recycle CIDRs for node if allocateNodeCIDRs == true | ||||
| 	cidrAllocator CIDRAllocator | ||||
| 	// manages taints | ||||
|  | ||||
| 	taintManager *NoExecuteTaintManager | ||||
|  | ||||
| 	forcefullyDeletePod        func(*v1.Pod) error | ||||
| @@ -210,6 +211,7 @@ func NewNodeController( | ||||
| 	serviceCIDR *net.IPNet, | ||||
| 	nodeCIDRMaskSize int, | ||||
| 	allocateNodeCIDRs bool, | ||||
| 	allocatorType CIDRAllocatorType, | ||||
| 	runTaintManager bool, | ||||
| 	useTaintBasedEvictions bool) (*NodeController, error) { | ||||
| 	eventBroadcaster := record.NewBroadcaster() | ||||
| @@ -254,6 +256,7 @@ func NewNodeController( | ||||
| 		clusterCIDR:                     clusterCIDR, | ||||
| 		serviceCIDR:                     serviceCIDR, | ||||
| 		allocateNodeCIDRs:               allocateNodeCIDRs, | ||||
| 		allocatorType:                   allocatorType, | ||||
| 		forcefullyDeletePod:             func(p *v1.Pod) error { return forcefullyDeletePod(kubeClient, p) }, | ||||
| 		nodeExistsInCloudProvider:       func(nodeName types.NodeName) (bool, error) { return nodeExistsInCloudProvider(cloud, nodeName) }, | ||||
| 		evictionLimiterQPS:              evictionLimiterQPS, | ||||
| @@ -309,7 +312,6 @@ func NewNodeController( | ||||
| 	}) | ||||
| 	nc.podInformerSynced = podInformer.Informer().HasSynced | ||||
|  | ||||
| 	nodeEventHandlerFuncs := cache.ResourceEventHandlerFuncs{} | ||||
| 	if nc.allocateNodeCIDRs { | ||||
| 		var nodeList *v1.NodeList | ||||
| 		var err error | ||||
| @@ -328,147 +330,32 @@ func NewNodeController( | ||||
| 		}); pollErr != nil { | ||||
| 			return nil, fmt.Errorf("Failed to list all nodes in %v, cannot proceed without updating CIDR map", apiserverStartupGracePeriod) | ||||
| 		} | ||||
| 		nc.cidrAllocator, err = NewCIDRRangeAllocator(kubeClient, clusterCIDR, serviceCIDR, nodeCIDRMaskSize, nodeList) | ||||
|  | ||||
| 		switch nc.allocatorType { | ||||
| 		case RangeAllocatorType: | ||||
| 			nc.cidrAllocator, err = NewCIDRRangeAllocator( | ||||
| 				kubeClient, clusterCIDR, serviceCIDR, nodeCIDRMaskSize, nodeList) | ||||
| 		case CloudAllocatorType: | ||||
| 			nc.cidrAllocator, err = NewCloudCIDRAllocator(kubeClient, cloud) | ||||
| 		default: | ||||
| 			return nil, fmt.Errorf("Invalid CIDR allocator type: %v", nc.allocatorType) | ||||
| 		} | ||||
|  | ||||
| 		if err != nil { | ||||
| 			return nil, err | ||||
| 		} | ||||
|  | ||||
| 		nodeEventHandlerFuncs = cache.ResourceEventHandlerFuncs{ | ||||
| 			AddFunc: func(originalObj interface{}) { | ||||
| 				obj, err := api.Scheme.DeepCopy(originalObj) | ||||
| 				if err != nil { | ||||
| 					utilruntime.HandleError(err) | ||||
| 					return | ||||
| 				} | ||||
| 				node := obj.(*v1.Node) | ||||
|  | ||||
| 				if err := nc.cidrAllocator.AllocateOrOccupyCIDR(node); err != nil { | ||||
| 					utilruntime.HandleError(fmt.Errorf("Error allocating CIDR: %v", err)) | ||||
| 				} | ||||
| 				if nc.taintManager != nil { | ||||
| 					nc.taintManager.NodeUpdated(nil, node) | ||||
| 				} | ||||
| 			}, | ||||
| 			UpdateFunc: func(oldNode, newNode interface{}) { | ||||
| 				node := newNode.(*v1.Node) | ||||
| 				prevNode := oldNode.(*v1.Node) | ||||
| 				// If the PodCIDR is not empty we either: | ||||
| 				// - already processed a Node that already had a CIDR after NC restarted | ||||
| 				//   (cidr is marked as used), | ||||
| 				// - already processed a Node successfully and allocated a CIDR for it | ||||
| 				//   (cidr is marked as used), | ||||
| 				// - already processed a Node but we did saw a "timeout" response and | ||||
| 				//   request eventually got through in this case we haven't released | ||||
| 				//   the allocated CIDR (cidr is still marked as used). | ||||
| 				// There's a possible error here: | ||||
| 				// - NC sees a new Node and assigns a CIDR X to it, | ||||
| 				// - Update Node call fails with a timeout, | ||||
| 				// - Node is updated by some other component, NC sees an update and | ||||
| 				//   assigns CIDR Y to the Node, | ||||
| 				// - Both CIDR X and CIDR Y are marked as used in the local cache, | ||||
| 				//   even though Node sees only CIDR Y | ||||
| 				// The problem here is that in in-memory cache we see CIDR X as marked, | ||||
| 				// which prevents it from being assigned to any new node. The cluster | ||||
| 				// state is correct. | ||||
| 				// Restart of NC fixes the issue. | ||||
| 				if node.Spec.PodCIDR == "" { | ||||
| 					nodeCopy, err := api.Scheme.Copy(node) | ||||
| 					if err != nil { | ||||
| 						utilruntime.HandleError(err) | ||||
| 						return | ||||
| 					} | ||||
|  | ||||
| 					if err := nc.cidrAllocator.AllocateOrOccupyCIDR(nodeCopy.(*v1.Node)); err != nil { | ||||
| 						utilruntime.HandleError(fmt.Errorf("Error allocating CIDR: %v", err)) | ||||
| 					} | ||||
| 				} | ||||
| 				if nc.taintManager != nil { | ||||
| 					nc.taintManager.NodeUpdated(prevNode, node) | ||||
| 				} | ||||
| 			}, | ||||
| 			DeleteFunc: func(originalObj interface{}) { | ||||
| 				obj, err := api.Scheme.DeepCopy(originalObj) | ||||
| 				if err != nil { | ||||
| 					utilruntime.HandleError(err) | ||||
| 					return | ||||
| 				} | ||||
|  | ||||
| 				node, isNode := obj.(*v1.Node) | ||||
| 				// We can get DeletedFinalStateUnknown instead of *v1.Node here and we need to handle that correctly. #34692 | ||||
| 				if !isNode { | ||||
| 					deletedState, ok := obj.(cache.DeletedFinalStateUnknown) | ||||
| 					if !ok { | ||||
| 						glog.Errorf("Received unexpected object: %v", obj) | ||||
| 						return | ||||
| 					} | ||||
| 					node, ok = deletedState.Obj.(*v1.Node) | ||||
| 					if !ok { | ||||
| 						glog.Errorf("DeletedFinalStateUnknown contained non-Node object: %v", deletedState.Obj) | ||||
| 						return | ||||
| 					} | ||||
| 				} | ||||
| 				if nc.taintManager != nil { | ||||
| 					nc.taintManager.NodeUpdated(node, nil) | ||||
| 				} | ||||
| 				if err := nc.cidrAllocator.ReleaseCIDR(node); err != nil { | ||||
| 					glog.Errorf("Error releasing CIDR: %v", err) | ||||
| 				} | ||||
| 			}, | ||||
| 		} | ||||
| 	} else { | ||||
| 		nodeEventHandlerFuncs = cache.ResourceEventHandlerFuncs{ | ||||
| 			AddFunc: func(originalObj interface{}) { | ||||
| 				obj, err := api.Scheme.DeepCopy(originalObj) | ||||
| 				if err != nil { | ||||
| 					utilruntime.HandleError(err) | ||||
| 					return | ||||
| 				} | ||||
| 				node := obj.(*v1.Node) | ||||
| 				if nc.taintManager != nil { | ||||
| 					nc.taintManager.NodeUpdated(nil, node) | ||||
| 				} | ||||
| 			}, | ||||
| 			UpdateFunc: func(oldNode, newNode interface{}) { | ||||
| 				node := newNode.(*v1.Node) | ||||
| 				prevNode := oldNode.(*v1.Node) | ||||
| 				if nc.taintManager != nil { | ||||
| 					nc.taintManager.NodeUpdated(prevNode, node) | ||||
|  | ||||
| 				} | ||||
| 			}, | ||||
| 			DeleteFunc: func(originalObj interface{}) { | ||||
| 				obj, err := api.Scheme.DeepCopy(originalObj) | ||||
| 				if err != nil { | ||||
| 					utilruntime.HandleError(err) | ||||
| 					return | ||||
| 				} | ||||
|  | ||||
| 				node, isNode := obj.(*v1.Node) | ||||
| 				// We can get DeletedFinalStateUnknown instead of *v1.Node here and we need to handle that correctly. #34692 | ||||
| 				if !isNode { | ||||
| 					deletedState, ok := obj.(cache.DeletedFinalStateUnknown) | ||||
| 					if !ok { | ||||
| 						glog.Errorf("Received unexpected object: %v", obj) | ||||
| 						return | ||||
| 					} | ||||
| 					node, ok = deletedState.Obj.(*v1.Node) | ||||
| 					if !ok { | ||||
| 						glog.Errorf("DeletedFinalStateUnknown contained non-Node object: %v", deletedState.Obj) | ||||
| 						return | ||||
| 					} | ||||
| 				} | ||||
| 				if nc.taintManager != nil { | ||||
| 					nc.taintManager.NodeUpdated(node, nil) | ||||
| 				} | ||||
| 			}, | ||||
| 		} | ||||
| 		nodeInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ | ||||
| 			AddFunc:    nc.onNodeAdd, | ||||
| 			UpdateFunc: nc.onNodeUpdate, | ||||
| 			DeleteFunc: nc.onNodeDelete, | ||||
| 		}) | ||||
| 	} | ||||
|  | ||||
| 	if nc.runTaintManager { | ||||
| 		nc.taintManager = NewNoExecuteTaintManager(kubeClient) | ||||
| 	} | ||||
|  | ||||
| 	nodeInformer.Informer().AddEventHandler(nodeEventHandlerFuncs) | ||||
| 	nc.nodeLister = nodeInformer.Lister() | ||||
| 	nc.nodeInformerSynced = nodeInformer.Informer().HasSynced | ||||
|  | ||||
| @@ -546,6 +433,90 @@ func (nc *NodeController) doTaintingPass() { | ||||
| 	} | ||||
| } | ||||
|  | ||||
| func (nc *NodeController) onNodeAdd(originalObj interface{}) { | ||||
| 	obj, err := api.Scheme.DeepCopy(originalObj) | ||||
| 	if err != nil { | ||||
| 		utilruntime.HandleError(err) | ||||
| 		return | ||||
| 	} | ||||
| 	node := obj.(*v1.Node) | ||||
|  | ||||
| 	if err := nc.cidrAllocator.AllocateOrOccupyCIDR(node); err != nil { | ||||
| 		utilruntime.HandleError(fmt.Errorf("Error allocating CIDR: %v", err)) | ||||
| 	} | ||||
| 	if nc.taintManager != nil { | ||||
| 		nc.taintManager.NodeUpdated(nil, node) | ||||
| 	} | ||||
| } | ||||
|  | ||||
| func (nc *NodeController) onNodeUpdate(oldNode, newNode interface{}) { | ||||
| 	node := newNode.(*v1.Node) | ||||
| 	prevNode := oldNode.(*v1.Node) | ||||
| 	// If the PodCIDR is not empty we either: | ||||
| 	// - already processed a Node that already had a CIDR after NC restarted | ||||
| 	//   (cidr is marked as used), | ||||
| 	// - already processed a Node successfully and allocated a CIDR for it | ||||
| 	//   (cidr is marked as used), | ||||
| 	// - already processed a Node but we did saw a "timeout" response and | ||||
| 	//   request eventually got through in this case we haven't released | ||||
| 	//   the allocated CIDR (cidr is still marked as used). | ||||
| 	// There's a possible error here: | ||||
| 	// - NC sees a new Node and assigns a CIDR X to it, | ||||
| 	// - Update Node call fails with a timeout, | ||||
| 	// - Node is updated by some other component, NC sees an update and | ||||
| 	//   assigns CIDR Y to the Node, | ||||
| 	// - Both CIDR X and CIDR Y are marked as used in the local cache, | ||||
| 	//   even though Node sees only CIDR Y | ||||
| 	// The problem here is that in in-memory cache we see CIDR X as marked, | ||||
| 	// which prevents it from being assigned to any new node. The cluster | ||||
| 	// state is correct. | ||||
| 	// Restart of NC fixes the issue. | ||||
| 	if node.Spec.PodCIDR == "" { | ||||
| 		nodeCopy, err := api.Scheme.Copy(node) | ||||
| 		if err != nil { | ||||
| 			utilruntime.HandleError(err) | ||||
| 			return | ||||
| 		} | ||||
|  | ||||
| 		if err := nc.cidrAllocator.AllocateOrOccupyCIDR(nodeCopy.(*v1.Node)); err != nil { | ||||
| 			utilruntime.HandleError(fmt.Errorf("Error allocating CIDR: %v", err)) | ||||
| 		} | ||||
| 	} | ||||
| 	if nc.taintManager != nil { | ||||
| 		nc.taintManager.NodeUpdated(prevNode, node) | ||||
| 	} | ||||
| } | ||||
|  | ||||
| func (nc *NodeController) onNodeDelete(originalObj interface{}) { | ||||
| 	obj, err := api.Scheme.DeepCopy(originalObj) | ||||
| 	if err != nil { | ||||
| 		utilruntime.HandleError(err) | ||||
| 		return | ||||
| 	} | ||||
|  | ||||
| 	node, isNode := obj.(*v1.Node) | ||||
| 	// We can get DeletedFinalStateUnknown instead of *v1.Node here and | ||||
| 	// we need to handle that correctly. #34692 | ||||
| 	if !isNode { | ||||
| 		deletedState, ok := obj.(cache.DeletedFinalStateUnknown) | ||||
| 		if !ok { | ||||
| 			glog.Errorf("Received unexpected object: %v", obj) | ||||
| 			return | ||||
| 		} | ||||
| 		node, ok = deletedState.Obj.(*v1.Node) | ||||
| 		if !ok { | ||||
| 			glog.Errorf("DeletedFinalStateUnknown contained non-Node object: %v", deletedState.Obj) | ||||
| 			return | ||||
| 		} | ||||
| 	} | ||||
| 	if nc.taintManager != nil { | ||||
| 		nc.taintManager.NodeUpdated(node, nil) | ||||
| 	} | ||||
| 	if err := nc.cidrAllocator.ReleaseCIDR(node); err != nil { | ||||
| 		glog.Errorf("Error releasing CIDR: %v", err) | ||||
| 	} | ||||
| } | ||||
|  | ||||
| // Run starts an asynchronous loop that monitors the status of cluster nodes. | ||||
| func (nc *NodeController) Run() { | ||||
| 	go func() { | ||||
|   | ||||
| @@ -101,6 +101,7 @@ func NewNodeControllerFromClient( | ||||
| 		serviceCIDR, | ||||
| 		nodeCIDRMaskSize, | ||||
| 		allocateNodeCIDRs, | ||||
| 		RangeAllocatorType, | ||||
| 		useTaints, | ||||
| 		useTaints, | ||||
| 	) | ||||
| @@ -549,9 +550,22 @@ func TestMonitorNodeStatusEvictPods(t *testing.T) { | ||||
| 	} | ||||
|  | ||||
| 	for _, item := range table { | ||||
| 		nodeController, _ := NewNodeControllerFromClient(nil, item.fakeNodeHandler, | ||||
| 			evictionTimeout, testRateLimiterQPS, testRateLimiterQPS, testLargeClusterThreshold, testUnhealtyThreshold, testNodeMonitorGracePeriod, | ||||
| 			testNodeStartupGracePeriod, testNodeMonitorPeriod, nil, nil, 0, false, false) | ||||
| 		nodeController, _ := NewNodeControllerFromClient( | ||||
| 			nil, | ||||
| 			item.fakeNodeHandler, | ||||
| 			evictionTimeout, | ||||
| 			testRateLimiterQPS, | ||||
| 			testRateLimiterQPS, | ||||
| 			testLargeClusterThreshold, | ||||
| 			testUnhealtyThreshold, | ||||
| 			testNodeMonitorGracePeriod, | ||||
| 			testNodeStartupGracePeriod, | ||||
| 			testNodeMonitorPeriod, | ||||
| 			nil, | ||||
| 			nil, | ||||
| 			0, | ||||
| 			false, | ||||
| 			false) | ||||
| 		nodeController.now = func() metav1.Time { return fakeNow } | ||||
| 		nodeController.recorder = testutil.NewFakeRecorder() | ||||
| 		for _, ds := range item.daemonSets { | ||||
|   | ||||
							
								
								
									
										262
									
								
								pkg/controller/node/range_allocator.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										262
									
								
								pkg/controller/node/range_allocator.go
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,262 @@ | ||||
| /* | ||||
| Copyright 2016 The Kubernetes Authors. | ||||
|  | ||||
| Licensed under the Apache License, Version 2.0 (the "License"); | ||||
| you may not use this file except in compliance with the License. | ||||
| You may obtain a copy of the License at | ||||
|  | ||||
|     http://www.apache.org/licenses/LICENSE-2.0 | ||||
|  | ||||
| Unless required by applicable law or agreed to in writing, software | ||||
| distributed under the License is distributed on an "AS IS" BASIS, | ||||
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
| See the License for the specific language governing permissions and | ||||
| limitations under the License. | ||||
| */ | ||||
|  | ||||
| package node | ||||
|  | ||||
| import ( | ||||
| 	"fmt" | ||||
| 	"net" | ||||
| 	"sync" | ||||
|  | ||||
| 	apierrors "k8s.io/apimachinery/pkg/api/errors" | ||||
| 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" | ||||
| 	"k8s.io/apimachinery/pkg/util/sets" | ||||
| 	"k8s.io/apimachinery/pkg/util/wait" | ||||
| 	v1core "k8s.io/client-go/kubernetes/typed/core/v1" | ||||
| 	clientv1 "k8s.io/client-go/pkg/api/v1" | ||||
| 	"k8s.io/client-go/tools/record" | ||||
| 	"k8s.io/kubernetes/pkg/api" | ||||
| 	"k8s.io/kubernetes/pkg/api/v1" | ||||
| 	"k8s.io/kubernetes/pkg/client/clientset_generated/clientset" | ||||
|  | ||||
| 	"github.com/golang/glog" | ||||
| ) | ||||
|  | ||||
| // TODO: figure out the good setting for those constants. | ||||
| const ( | ||||
| 	// controls how many NodeSpec updates NC can process concurrently. | ||||
| 	cidrUpdateWorkers   = 10 | ||||
| 	cidrUpdateQueueSize = 5000 | ||||
| 	// podCIDRUpdateRetry controls the number of retries of writing Node.Spec.PodCIDR update. | ||||
| 	podCIDRUpdateRetry = 5 | ||||
| ) | ||||
|  | ||||
| type rangeAllocator struct { | ||||
| 	client      clientset.Interface | ||||
| 	cidrs       *cidrSet | ||||
| 	clusterCIDR *net.IPNet | ||||
| 	maxCIDRs    int | ||||
| 	// Channel that is used to pass updating Nodes with assigned CIDRs to the background | ||||
| 	// This increases a throughput of CIDR assignment by not blocking on long operations. | ||||
| 	nodeCIDRUpdateChannel chan nodeAndCIDR | ||||
| 	recorder              record.EventRecorder | ||||
| 	// Keep a set of nodes that are currectly being processed to avoid races in CIDR allocation | ||||
| 	sync.Mutex | ||||
| 	nodesInProcessing sets.String | ||||
| } | ||||
|  | ||||
| // NewCIDRRangeAllocator returns a CIDRAllocator to allocate CIDR for node | ||||
| // Caller must ensure subNetMaskSize is not less than cluster CIDR mask size. | ||||
| // Caller must always pass in a list of existing nodes so the new allocator | ||||
| // can initialize its CIDR map. NodeList is only nil in testing. | ||||
| func NewCIDRRangeAllocator(client clientset.Interface, clusterCIDR *net.IPNet, serviceCIDR *net.IPNet, subNetMaskSize int, nodeList *v1.NodeList) (CIDRAllocator, error) { | ||||
| 	eventBroadcaster := record.NewBroadcaster() | ||||
| 	recorder := eventBroadcaster.NewRecorder(api.Scheme, clientv1.EventSource{Component: "cidrAllocator"}) | ||||
| 	eventBroadcaster.StartLogging(glog.Infof) | ||||
| 	if client != nil { | ||||
| 		glog.V(0).Infof("Sending events to api server.") | ||||
| 		eventBroadcaster.StartRecordingToSink(&v1core.EventSinkImpl{Interface: v1core.New(client.Core().RESTClient()).Events("")}) | ||||
| 	} else { | ||||
| 		glog.Fatalf("kubeClient is nil when starting NodeController") | ||||
| 	} | ||||
|  | ||||
| 	ra := &rangeAllocator{ | ||||
| 		client:                client, | ||||
| 		cidrs:                 newCIDRSet(clusterCIDR, subNetMaskSize), | ||||
| 		clusterCIDR:           clusterCIDR, | ||||
| 		nodeCIDRUpdateChannel: make(chan nodeAndCIDR, cidrUpdateQueueSize), | ||||
| 		recorder:              recorder, | ||||
| 		nodesInProcessing:     sets.NewString(), | ||||
| 	} | ||||
|  | ||||
| 	if serviceCIDR != nil { | ||||
| 		ra.filterOutServiceRange(serviceCIDR) | ||||
| 	} else { | ||||
| 		glog.V(0).Info("No Service CIDR provided. Skipping filtering out service addresses.") | ||||
| 	} | ||||
|  | ||||
| 	if nodeList != nil { | ||||
| 		for _, node := range nodeList.Items { | ||||
| 			if node.Spec.PodCIDR == "" { | ||||
| 				glog.Infof("Node %v has no CIDR, ignoring", node.Name) | ||||
| 				continue | ||||
| 			} else { | ||||
| 				glog.Infof("Node %v has CIDR %s, occupying it in CIDR map", | ||||
| 					node.Name, node.Spec.PodCIDR) | ||||
| 			} | ||||
| 			if err := ra.occupyCIDR(&node); err != nil { | ||||
| 				// This will happen if: | ||||
| 				// 1. We find garbage in the podCIDR field. Retrying is useless. | ||||
| 				// 2. CIDR out of range: This means a node CIDR has changed. | ||||
| 				// This error will keep crashing controller-manager. | ||||
| 				return nil, err | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
| 	for i := 0; i < cidrUpdateWorkers; i++ { | ||||
| 		go func(stopChan <-chan struct{}) { | ||||
| 			for { | ||||
| 				select { | ||||
| 				case workItem, ok := <-ra.nodeCIDRUpdateChannel: | ||||
| 					if !ok { | ||||
| 						glog.Warning("NodeCIDRUpdateChannel read returned false.") | ||||
| 						return | ||||
| 					} | ||||
| 					ra.updateCIDRAllocation(workItem) | ||||
| 				case <-stopChan: | ||||
| 					return | ||||
| 				} | ||||
| 			} | ||||
| 		}(wait.NeverStop) | ||||
| 	} | ||||
|  | ||||
| 	return ra, nil | ||||
| } | ||||
|  | ||||
| func (r *rangeAllocator) insertNodeToProcessing(nodeName string) bool { | ||||
| 	r.Lock() | ||||
| 	defer r.Unlock() | ||||
| 	if r.nodesInProcessing.Has(nodeName) { | ||||
| 		return false | ||||
| 	} | ||||
| 	r.nodesInProcessing.Insert(nodeName) | ||||
| 	return true | ||||
| } | ||||
|  | ||||
| func (r *rangeAllocator) removeNodeFromProcessing(nodeName string) { | ||||
| 	r.Lock() | ||||
| 	defer r.Unlock() | ||||
| 	r.nodesInProcessing.Delete(nodeName) | ||||
| } | ||||
|  | ||||
| func (r *rangeAllocator) occupyCIDR(node *v1.Node) error { | ||||
| 	defer r.removeNodeFromProcessing(node.Name) | ||||
| 	if node.Spec.PodCIDR == "" { | ||||
| 		return nil | ||||
| 	} | ||||
| 	_, podCIDR, err := net.ParseCIDR(node.Spec.PodCIDR) | ||||
| 	if err != nil { | ||||
| 		return fmt.Errorf("failed to parse node %s, CIDR %s", node.Name, node.Spec.PodCIDR) | ||||
| 	} | ||||
| 	if err := r.cidrs.occupy(podCIDR); err != nil { | ||||
| 		return fmt.Errorf("failed to mark cidr as occupied: %v", err) | ||||
| 	} | ||||
| 	return nil | ||||
| } | ||||
|  | ||||
| // WARNING: If you're adding any return calls or defer any more work from this | ||||
| // function you have to handle correctly nodesInProcessing. | ||||
| func (r *rangeAllocator) AllocateOrOccupyCIDR(node *v1.Node) error { | ||||
| 	if node == nil { | ||||
| 		return nil | ||||
| 	} | ||||
| 	if !r.insertNodeToProcessing(node.Name) { | ||||
| 		glog.V(2).Infof("Node %v is already in a process of CIDR assignment.", node.Name) | ||||
| 		return nil | ||||
| 	} | ||||
| 	if node.Spec.PodCIDR != "" { | ||||
| 		return r.occupyCIDR(node) | ||||
| 	} | ||||
| 	podCIDR, err := r.cidrs.allocateNext() | ||||
| 	if err != nil { | ||||
| 		r.removeNodeFromProcessing(node.Name) | ||||
| 		recordNodeStatusChange(r.recorder, node, "CIDRNotAvailable") | ||||
| 		return fmt.Errorf("failed to allocate cidr: %v", err) | ||||
| 	} | ||||
|  | ||||
| 	glog.V(10).Infof("Putting node %s with CIDR %s into the work queue", node.Name, podCIDR) | ||||
| 	r.nodeCIDRUpdateChannel <- nodeAndCIDR{ | ||||
| 		nodeName: node.Name, | ||||
| 		cidr:     podCIDR, | ||||
| 	} | ||||
| 	return nil | ||||
| } | ||||
|  | ||||
| func (r *rangeAllocator) ReleaseCIDR(node *v1.Node) error { | ||||
| 	if node == nil || node.Spec.PodCIDR == "" { | ||||
| 		return nil | ||||
| 	} | ||||
| 	_, podCIDR, err := net.ParseCIDR(node.Spec.PodCIDR) | ||||
| 	if err != nil { | ||||
| 		return fmt.Errorf("Failed to parse CIDR %s on Node %v: %v", node.Spec.PodCIDR, node.Name, err) | ||||
| 	} | ||||
|  | ||||
| 	glog.V(4).Infof("release CIDR %s", node.Spec.PodCIDR) | ||||
| 	if err = r.cidrs.release(podCIDR); err != nil { | ||||
| 		return fmt.Errorf("Error when releasing CIDR %v: %v", node.Spec.PodCIDR, err) | ||||
| 	} | ||||
| 	return err | ||||
| } | ||||
|  | ||||
| // Marks all CIDRs with subNetMaskSize that belongs to serviceCIDR as used, | ||||
| // so that they won't be assignable. | ||||
| func (r *rangeAllocator) filterOutServiceRange(serviceCIDR *net.IPNet) { | ||||
| 	// Checks if service CIDR has a nonempty intersection with cluster | ||||
| 	// CIDR. It is the case if either clusterCIDR contains serviceCIDR with | ||||
| 	// clusterCIDR's Mask applied (this means that clusterCIDR contains | ||||
| 	// serviceCIDR) or vice versa (which means that serviceCIDR contains | ||||
| 	// clusterCIDR). | ||||
| 	if !r.clusterCIDR.Contains(serviceCIDR.IP.Mask(r.clusterCIDR.Mask)) && !serviceCIDR.Contains(r.clusterCIDR.IP.Mask(serviceCIDR.Mask)) { | ||||
| 		return | ||||
| 	} | ||||
|  | ||||
| 	if err := r.cidrs.occupy(serviceCIDR); err != nil { | ||||
| 		glog.Errorf("Error filtering out service cidr %v: %v", serviceCIDR, err) | ||||
| 	} | ||||
| } | ||||
|  | ||||
| // Assigns CIDR to Node and sends an update to the API server. | ||||
| func (r *rangeAllocator) updateCIDRAllocation(data nodeAndCIDR) error { | ||||
| 	var err error | ||||
| 	var node *v1.Node | ||||
| 	defer r.removeNodeFromProcessing(data.nodeName) | ||||
| 	for rep := 0; rep < podCIDRUpdateRetry; rep++ { | ||||
| 		// TODO: change it to using PATCH instead of full Node updates. | ||||
| 		node, err = r.client.Core().Nodes().Get(data.nodeName, metav1.GetOptions{}) | ||||
| 		if err != nil { | ||||
| 			glog.Errorf("Failed while getting node %v to retry updating Node.Spec.PodCIDR: %v", data.nodeName, err) | ||||
| 			continue | ||||
| 		} | ||||
| 		if node.Spec.PodCIDR != "" { | ||||
| 			glog.Errorf("Node %v already has allocated CIDR %v. Releasing assigned one if different.", node.Name, node.Spec.PodCIDR) | ||||
| 			if node.Spec.PodCIDR != data.cidr.String() { | ||||
| 				if err := r.cidrs.release(data.cidr); err != nil { | ||||
| 					glog.Errorf("Error when releasing CIDR %v", data.cidr.String()) | ||||
| 				} | ||||
| 			} | ||||
| 			return nil | ||||
| 		} | ||||
| 		node.Spec.PodCIDR = data.cidr.String() | ||||
| 		if _, err := r.client.Core().Nodes().Update(node); err != nil { | ||||
| 			glog.Errorf("Failed while updating Node.Spec.PodCIDR (%d retries left): %v", podCIDRUpdateRetry-rep-1, err) | ||||
| 		} else { | ||||
| 			break | ||||
| 		} | ||||
| 	} | ||||
| 	if err != nil { | ||||
| 		recordNodeStatusChange(r.recorder, node, "CIDRAssignmentFailed") | ||||
| 		// We accept the fact that we may leek CIDRs here. This is safer than releasing | ||||
| 		// them in case when we don't know if request went through. | ||||
| 		// NodeController restart will return all falsely allocated CIDRs to the pool. | ||||
| 		if !apierrors.IsServerTimeout(err) { | ||||
| 			glog.Errorf("CIDR assignment for node %v failed: %v. Releasing allocated CIDR", data.nodeName, err) | ||||
| 			if releaseErr := r.cidrs.release(data.cidr); releaseErr != nil { | ||||
| 				glog.Errorf("Error releasing allocated CIDR for node %v: %v", data.nodeName, releaseErr) | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
| 	return err | ||||
| } | ||||
| @@ -168,7 +168,7 @@ func init() { | ||||
| 		ObjectMeta: metav1.ObjectMeta{Name: saRolePrefix + "node-controller"}, | ||||
| 		Rules: []rbac.PolicyRule{ | ||||
| 			rbac.NewRule("get", "list", "update", "delete", "patch").Groups(legacyGroup).Resources("nodes").RuleOrDie(), | ||||
| 			rbac.NewRule("update").Groups(legacyGroup).Resources("nodes/status").RuleOrDie(), | ||||
| 			rbac.NewRule("patch", "update").Groups(legacyGroup).Resources("nodes/status").RuleOrDie(), | ||||
| 			// used for pod eviction | ||||
| 			rbac.NewRule("update").Groups(legacyGroup).Resources("pods/status").RuleOrDie(), | ||||
| 			rbac.NewRule("list", "delete").Groups(legacyGroup).Resources("pods").RuleOrDie(), | ||||
|   | ||||
| @@ -561,6 +561,7 @@ items: | ||||
|     resources: | ||||
|     - nodes/status | ||||
|     verbs: | ||||
|     - patch | ||||
|     - update | ||||
|   - apiGroups: | ||||
|     - "" | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Bowei Du
					Bowei Du