mirror of
				https://github.com/optim-enterprises-bv/kubernetes.git
				synced 2025-10-31 02:08:13 +00:00 
			
		
		
		
	Adds support for PodCIDR allocation from the GCE cloud provider
If CIDRAllocatorType is set to `CloudCIDRAllocator`, then allocation of CIDR allocation instead is done by the external cloud provider and the node controller is only responsible for reflecting the allocation into the node spec. - Splits off the rangeAllocator from the cidr_allocator.go file. - Adds cloudCIDRAllocator, which is used when the cloud provider allocates the CIDR ranges externally. (GCE support only) - Updates RBAC permission for node controller to include PATCH
This commit is contained in:
		| @@ -477,6 +477,7 @@ func StartControllers(controllers map[string]InitFunc, s *options.CMServer, root | |||||||
| 			serviceCIDR, | 			serviceCIDR, | ||||||
| 			int(s.NodeCIDRMaskSize), | 			int(s.NodeCIDRMaskSize), | ||||||
| 			s.AllocateNodeCIDRs, | 			s.AllocateNodeCIDRs, | ||||||
|  | 			nodecontroller.CIDRAllocatorType(s.CIDRAllocatorType), | ||||||
| 			s.EnableTaintManager, | 			s.EnableTaintManager, | ||||||
| 			utilfeature.DefaultFeatureGate.Enabled(features.TaintBasedEvictions), | 			utilfeature.DefaultFeatureGate.Enabled(features.TaintBasedEvictions), | ||||||
| 		) | 		) | ||||||
|   | |||||||
| @@ -188,7 +188,10 @@ func (s *CMServer) AddFlags(fs *pflag.FlagSet, allControllers []string, disabled | |||||||
| 	fs.StringVar(&s.ClusterCIDR, "cluster-cidr", s.ClusterCIDR, "CIDR Range for Pods in cluster.") | 	fs.StringVar(&s.ClusterCIDR, "cluster-cidr", s.ClusterCIDR, "CIDR Range for Pods in cluster.") | ||||||
| 	fs.StringVar(&s.ServiceCIDR, "service-cluster-ip-range", s.ServiceCIDR, "CIDR Range for Services in cluster.") | 	fs.StringVar(&s.ServiceCIDR, "service-cluster-ip-range", s.ServiceCIDR, "CIDR Range for Services in cluster.") | ||||||
| 	fs.Int32Var(&s.NodeCIDRMaskSize, "node-cidr-mask-size", s.NodeCIDRMaskSize, "Mask size for node cidr in cluster.") | 	fs.Int32Var(&s.NodeCIDRMaskSize, "node-cidr-mask-size", s.NodeCIDRMaskSize, "Mask size for node cidr in cluster.") | ||||||
| 	fs.BoolVar(&s.AllocateNodeCIDRs, "allocate-node-cidrs", false, "Should CIDRs for Pods be allocated and set on the cloud provider.") | 	fs.BoolVar(&s.AllocateNodeCIDRs, "allocate-node-cidrs", false, | ||||||
|  | 		"Should CIDRs for Pods be allocated and set on the cloud provider.") | ||||||
|  | 	fs.StringVar(&s.CIDRAllocatorType, "cidr-allocator-type", "RangeAllocator", | ||||||
|  | 		"Type of CIDR allocator to use") | ||||||
| 	fs.BoolVar(&s.ConfigureCloudRoutes, "configure-cloud-routes", true, "Should CIDRs allocated by allocate-node-cidrs be configured on the cloud provider.") | 	fs.BoolVar(&s.ConfigureCloudRoutes, "configure-cloud-routes", true, "Should CIDRs allocated by allocate-node-cidrs be configured on the cloud provider.") | ||||||
| 	fs.StringVar(&s.Master, "master", s.Master, "The address of the Kubernetes API server (overrides any value in kubeconfig)") | 	fs.StringVar(&s.Master, "master", s.Master, "The address of the Kubernetes API server (overrides any value in kubeconfig)") | ||||||
| 	fs.StringVar(&s.Kubeconfig, "kubeconfig", s.Kubeconfig, "Path to kubeconfig file with authorization and master location information.") | 	fs.StringVar(&s.Kubeconfig, "kubeconfig", s.Kubeconfig, "Path to kubeconfig file with authorization and master location information.") | ||||||
|   | |||||||
| @@ -794,9 +794,11 @@ type KubeControllerManagerConfiguration struct { | |||||||
| 	ServiceCIDR string | 	ServiceCIDR string | ||||||
| 	// NodeCIDRMaskSize is the mask size for node cidr in cluster. | 	// NodeCIDRMaskSize is the mask size for node cidr in cluster. | ||||||
| 	NodeCIDRMaskSize int32 | 	NodeCIDRMaskSize int32 | ||||||
| 	// allocateNodeCIDRs enables CIDRs for Pods to be allocated and, if | 	// AllocateNodeCIDRs enables CIDRs for Pods to be allocated and, if | ||||||
| 	// ConfigureCloudRoutes is true, to be set on the cloud provider. | 	// ConfigureCloudRoutes is true, to be set on the cloud provider. | ||||||
| 	AllocateNodeCIDRs bool | 	AllocateNodeCIDRs bool | ||||||
|  | 	// CIDRAllocatorType determines what kind of pod CIDR allocator will be used. | ||||||
|  | 	CIDRAllocatorType string | ||||||
| 	// configureCloudRoutes enables CIDRs allocated with allocateNodeCIDRs | 	// configureCloudRoutes enables CIDRs allocated with allocateNodeCIDRs | ||||||
| 	// to be configured on the cloud provider. | 	// to be configured on the cloud provider. | ||||||
| 	ConfigureCloudRoutes bool | 	ConfigureCloudRoutes bool | ||||||
|   | |||||||
| @@ -217,10 +217,10 @@ func (gce *GCECloud) CurrentNodeName(hostname string) (types.NodeName, error) { | |||||||
| 	return types.NodeName(hostname), nil | 	return types.NodeName(hostname), nil | ||||||
| } | } | ||||||
|  |  | ||||||
| // PodCIDRs returns a list of CIDR ranges that are assigned to the | // AliasRanges returns a list of CIDR ranges that are assigned to the | ||||||
| // `node` for allocation to pods. Returns a list of the form | // `node` for allocation to pods. Returns a list of the form | ||||||
| // "<ip>/<netmask>". | // "<ip>/<netmask>". | ||||||
| func (gce *GCECloud) PodCIDRs(nodeName types.NodeName) (cidrs []string, err error) { | func (gce *GCECloud) AliasRanges(nodeName types.NodeName) (cidrs []string, err error) { | ||||||
| 	var instance *gceInstance | 	var instance *gceInstance | ||||||
| 	instance, err = gce.getInstanceByName(mapNodeNameToInstanceName(nodeName)) | 	instance, err = gce.getInstanceByName(mapNodeNameToInstanceName(nodeName)) | ||||||
| 	if err != nil { | 	if err != nil { | ||||||
|   | |||||||
| @@ -8,56 +8,6 @@ load( | |||||||
|     "go_test", |     "go_test", | ||||||
| ) | ) | ||||||
|  |  | ||||||
| go_library( |  | ||||||
|     name = "go_default_library", |  | ||||||
|     srcs = [ |  | ||||||
|         "cidr_allocator.go", |  | ||||||
|         "cidr_set.go", |  | ||||||
|         "controller_utils.go", |  | ||||||
|         "doc.go", |  | ||||||
|         "metrics.go", |  | ||||||
|         "nodecontroller.go", |  | ||||||
|         "rate_limited_queue.go", |  | ||||||
|         "taint_controller.go", |  | ||||||
|         "timed_workers.go", |  | ||||||
|     ], |  | ||||||
|     tags = ["automanaged"], |  | ||||||
|     deps = [ |  | ||||||
|         "//pkg/api:go_default_library", |  | ||||||
|         "//pkg/api/v1:go_default_library", |  | ||||||
|         "//pkg/client/clientset_generated/clientset:go_default_library", |  | ||||||
|         "//pkg/client/informers/informers_generated/externalversions/core/v1:go_default_library", |  | ||||||
|         "//pkg/client/informers/informers_generated/externalversions/extensions/v1beta1:go_default_library", |  | ||||||
|         "//pkg/client/listers/core/v1:go_default_library", |  | ||||||
|         "//pkg/client/listers/extensions/v1beta1:go_default_library", |  | ||||||
|         "//pkg/cloudprovider:go_default_library", |  | ||||||
|         "//pkg/controller:go_default_library", |  | ||||||
|         "//pkg/kubelet/util/format:go_default_library", |  | ||||||
|         "//pkg/util/metrics:go_default_library", |  | ||||||
|         "//pkg/util/node:go_default_library", |  | ||||||
|         "//pkg/util/system:go_default_library", |  | ||||||
|         "//pkg/util/version:go_default_library", |  | ||||||
|         "//vendor:github.com/golang/glog", |  | ||||||
|         "//vendor:github.com/prometheus/client_golang/prometheus", |  | ||||||
|         "//vendor:k8s.io/apimachinery/pkg/api/equality", |  | ||||||
|         "//vendor:k8s.io/apimachinery/pkg/api/errors", |  | ||||||
|         "//vendor:k8s.io/apimachinery/pkg/apis/meta/v1", |  | ||||||
|         "//vendor:k8s.io/apimachinery/pkg/fields", |  | ||||||
|         "//vendor:k8s.io/apimachinery/pkg/labels", |  | ||||||
|         "//vendor:k8s.io/apimachinery/pkg/types", |  | ||||||
|         "//vendor:k8s.io/apimachinery/pkg/util/errors", |  | ||||||
|         "//vendor:k8s.io/apimachinery/pkg/util/runtime", |  | ||||||
|         "//vendor:k8s.io/apimachinery/pkg/util/sets", |  | ||||||
|         "//vendor:k8s.io/apimachinery/pkg/util/wait", |  | ||||||
|         "//vendor:k8s.io/client-go/kubernetes/typed/core/v1", |  | ||||||
|         "//vendor:k8s.io/client-go/pkg/api/v1", |  | ||||||
|         "//vendor:k8s.io/client-go/tools/cache", |  | ||||||
|         "//vendor:k8s.io/client-go/tools/record", |  | ||||||
|         "//vendor:k8s.io/client-go/util/flowcontrol", |  | ||||||
|         "//vendor:k8s.io/client-go/util/workqueue", |  | ||||||
|     ], |  | ||||||
| ) |  | ||||||
|  |  | ||||||
| go_test( | go_test( | ||||||
|     name = "go_default_test", |     name = "go_default_test", | ||||||
|     srcs = [ |     srcs = [ | ||||||
| @@ -96,6 +46,59 @@ go_test( | |||||||
|     ], |     ], | ||||||
| ) | ) | ||||||
|  |  | ||||||
|  | go_library( | ||||||
|  |     name = "go_default_library", | ||||||
|  |     srcs = [ | ||||||
|  |         "cidr_allocator.go", | ||||||
|  |         "cidr_set.go", | ||||||
|  |         "cloud_cidr_allocator.go", | ||||||
|  |         "controller_utils.go", | ||||||
|  |         "doc.go", | ||||||
|  |         "metrics.go", | ||||||
|  |         "nodecontroller.go", | ||||||
|  |         "range_allocator.go", | ||||||
|  |         "rate_limited_queue.go", | ||||||
|  |         "taint_controller.go", | ||||||
|  |         "timed_workers.go", | ||||||
|  |     ], | ||||||
|  |     tags = ["automanaged"], | ||||||
|  |     deps = [ | ||||||
|  |         "//pkg/api:go_default_library", | ||||||
|  |         "//pkg/api/v1:go_default_library", | ||||||
|  |         "//pkg/client/clientset_generated/clientset:go_default_library", | ||||||
|  |         "//pkg/client/informers/informers_generated/externalversions/core/v1:go_default_library", | ||||||
|  |         "//pkg/client/informers/informers_generated/externalversions/extensions/v1beta1:go_default_library", | ||||||
|  |         "//pkg/client/listers/core/v1:go_default_library", | ||||||
|  |         "//pkg/client/listers/extensions/v1beta1:go_default_library", | ||||||
|  |         "//pkg/cloudprovider:go_default_library", | ||||||
|  |         "//pkg/cloudprovider/providers/gce:go_default_library", | ||||||
|  |         "//pkg/controller:go_default_library", | ||||||
|  |         "//pkg/kubelet/util/format:go_default_library", | ||||||
|  |         "//pkg/util/metrics:go_default_library", | ||||||
|  |         "//pkg/util/node:go_default_library", | ||||||
|  |         "//pkg/util/system:go_default_library", | ||||||
|  |         "//pkg/util/version:go_default_library", | ||||||
|  |         "//vendor:github.com/golang/glog", | ||||||
|  |         "//vendor:github.com/prometheus/client_golang/prometheus", | ||||||
|  |         "//vendor:k8s.io/apimachinery/pkg/api/equality", | ||||||
|  |         "//vendor:k8s.io/apimachinery/pkg/api/errors", | ||||||
|  |         "//vendor:k8s.io/apimachinery/pkg/apis/meta/v1", | ||||||
|  |         "//vendor:k8s.io/apimachinery/pkg/fields", | ||||||
|  |         "//vendor:k8s.io/apimachinery/pkg/labels", | ||||||
|  |         "//vendor:k8s.io/apimachinery/pkg/types", | ||||||
|  |         "//vendor:k8s.io/apimachinery/pkg/util/errors", | ||||||
|  |         "//vendor:k8s.io/apimachinery/pkg/util/runtime", | ||||||
|  |         "//vendor:k8s.io/apimachinery/pkg/util/sets", | ||||||
|  |         "//vendor:k8s.io/apimachinery/pkg/util/wait", | ||||||
|  |         "//vendor:k8s.io/client-go/kubernetes/typed/core/v1", | ||||||
|  |         "//vendor:k8s.io/client-go/pkg/api/v1", | ||||||
|  |         "//vendor:k8s.io/client-go/tools/cache", | ||||||
|  |         "//vendor:k8s.io/client-go/tools/record", | ||||||
|  |         "//vendor:k8s.io/client-go/util/flowcontrol", | ||||||
|  |         "//vendor:k8s.io/client-go/util/workqueue", | ||||||
|  |     ], | ||||||
|  | ) | ||||||
|  |  | ||||||
| filegroup( | filegroup( | ||||||
|     name = "package-srcs", |     name = "package-srcs", | ||||||
|     srcs = glob(["**"]), |     srcs = glob(["**"]), | ||||||
|   | |||||||
| @@ -18,259 +18,34 @@ package node | |||||||
|  |  | ||||||
| import ( | import ( | ||||||
| 	"errors" | 	"errors" | ||||||
| 	"fmt" |  | ||||||
| 	"net" | 	"net" | ||||||
| 	"sync" |  | ||||||
|  |  | ||||||
| 	apierrors "k8s.io/apimachinery/pkg/api/errors" | 	v1 "k8s.io/kubernetes/pkg/api/v1" | ||||||
| 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" |  | ||||||
| 	"k8s.io/apimachinery/pkg/util/sets" |  | ||||||
| 	"k8s.io/apimachinery/pkg/util/wait" |  | ||||||
| 	v1core "k8s.io/client-go/kubernetes/typed/core/v1" |  | ||||||
| 	clientv1 "k8s.io/client-go/pkg/api/v1" |  | ||||||
| 	"k8s.io/client-go/tools/record" |  | ||||||
| 	"k8s.io/kubernetes/pkg/api" |  | ||||||
| 	"k8s.io/kubernetes/pkg/api/v1" |  | ||||||
| 	"k8s.io/kubernetes/pkg/client/clientset_generated/clientset" |  | ||||||
|  |  | ||||||
| 	"github.com/golang/glog" |  | ||||||
| ) | ) | ||||||
|  |  | ||||||
| // TODO: figure out the good setting for those constants. | var errCIDRRangeNoCIDRsRemaining = errors.New( | ||||||
| const ( | 	"CIDR allocation failed; there are no remaining CIDRs left to allocate in the accepted range") | ||||||
| 	// controls how many NodeSpec updates NC can process concurrently. |  | ||||||
| 	cidrUpdateWorkers   = 10 |  | ||||||
| 	cidrUpdateQueueSize = 5000 |  | ||||||
| 	// podCIDRUpdateRetry controls the number of retries of writing Node.Spec.PodCIDR update. |  | ||||||
| 	podCIDRUpdateRetry = 5 |  | ||||||
| ) |  | ||||||
|  |  | ||||||
| var errCIDRRangeNoCIDRsRemaining = errors.New("CIDR allocation failed; there are no remaining CIDRs left to allocate in the accepted range") |  | ||||||
|  |  | ||||||
| type nodeAndCIDR struct { | type nodeAndCIDR struct { | ||||||
| 	cidr     *net.IPNet | 	cidr     *net.IPNet | ||||||
| 	nodeName string | 	nodeName string | ||||||
| } | } | ||||||
|  |  | ||||||
| // CIDRAllocator is an interface implemented by things that know how to allocate/occupy/recycle CIDR for nodes. | // CIDRAllocatorType is the type of the allocator to use. | ||||||
|  | type CIDRAllocatorType string | ||||||
|  |  | ||||||
|  | const ( | ||||||
|  | 	RangeAllocatorType CIDRAllocatorType = "RangeAllocator" | ||||||
|  | 	CloudAllocatorType CIDRAllocatorType = "CloudAllocator" | ||||||
|  | ) | ||||||
|  |  | ||||||
|  | // CIDRAllocator is an interface implemented by things that know how to | ||||||
|  | // allocate/occupy/recycle CIDR for nodes. | ||||||
| type CIDRAllocator interface { | type CIDRAllocator interface { | ||||||
|  | 	// AllocateOrOccupyCIDR looks at the given node, assigns it a valid | ||||||
|  | 	// CIDR if it doesn't currently have one or mark the CIDR as used if | ||||||
|  | 	// the node already have one. | ||||||
| 	AllocateOrOccupyCIDR(node *v1.Node) error | 	AllocateOrOccupyCIDR(node *v1.Node) error | ||||||
|  | 	// ReleaseCIDR releases the CIDR of the removed node | ||||||
| 	ReleaseCIDR(node *v1.Node) error | 	ReleaseCIDR(node *v1.Node) error | ||||||
| } | } | ||||||
|  |  | ||||||
| type rangeAllocator struct { |  | ||||||
| 	client      clientset.Interface |  | ||||||
| 	cidrs       *cidrSet |  | ||||||
| 	clusterCIDR *net.IPNet |  | ||||||
| 	maxCIDRs    int |  | ||||||
| 	// Channel that is used to pass updating Nodes with assigned CIDRs to the background |  | ||||||
| 	// This increases a throughput of CIDR assignment by not blocking on long operations. |  | ||||||
| 	nodeCIDRUpdateChannel chan nodeAndCIDR |  | ||||||
| 	recorder              record.EventRecorder |  | ||||||
| 	// Keep a set of nodes that are currectly being processed to avoid races in CIDR allocation |  | ||||||
| 	sync.Mutex |  | ||||||
| 	nodesInProcessing sets.String |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // NewCIDRRangeAllocator returns a CIDRAllocator to allocate CIDR for node |  | ||||||
| // Caller must ensure subNetMaskSize is not less than cluster CIDR mask size. |  | ||||||
| // Caller must always pass in a list of existing nodes so the new allocator |  | ||||||
| // can initialize its CIDR map. NodeList is only nil in testing. |  | ||||||
| func NewCIDRRangeAllocator(client clientset.Interface, clusterCIDR *net.IPNet, serviceCIDR *net.IPNet, subNetMaskSize int, nodeList *v1.NodeList) (CIDRAllocator, error) { |  | ||||||
| 	eventBroadcaster := record.NewBroadcaster() |  | ||||||
| 	recorder := eventBroadcaster.NewRecorder(api.Scheme, clientv1.EventSource{Component: "cidrAllocator"}) |  | ||||||
| 	eventBroadcaster.StartLogging(glog.Infof) |  | ||||||
| 	if client != nil { |  | ||||||
| 		glog.V(0).Infof("Sending events to api server.") |  | ||||||
| 		eventBroadcaster.StartRecordingToSink(&v1core.EventSinkImpl{Interface: v1core.New(client.Core().RESTClient()).Events("")}) |  | ||||||
| 	} else { |  | ||||||
| 		glog.Fatalf("kubeClient is nil when starting NodeController") |  | ||||||
| 	} |  | ||||||
|  |  | ||||||
| 	ra := &rangeAllocator{ |  | ||||||
| 		client:                client, |  | ||||||
| 		cidrs:                 newCIDRSet(clusterCIDR, subNetMaskSize), |  | ||||||
| 		clusterCIDR:           clusterCIDR, |  | ||||||
| 		nodeCIDRUpdateChannel: make(chan nodeAndCIDR, cidrUpdateQueueSize), |  | ||||||
| 		recorder:              recorder, |  | ||||||
| 		nodesInProcessing:     sets.NewString(), |  | ||||||
| 	} |  | ||||||
|  |  | ||||||
| 	if serviceCIDR != nil { |  | ||||||
| 		ra.filterOutServiceRange(serviceCIDR) |  | ||||||
| 	} else { |  | ||||||
| 		glog.V(0).Info("No Service CIDR provided. Skipping filtering out service addresses.") |  | ||||||
| 	} |  | ||||||
|  |  | ||||||
| 	if nodeList != nil { |  | ||||||
| 		for _, node := range nodeList.Items { |  | ||||||
| 			if node.Spec.PodCIDR == "" { |  | ||||||
| 				glog.Infof("Node %v has no CIDR, ignoring", node.Name) |  | ||||||
| 				continue |  | ||||||
| 			} else { |  | ||||||
| 				glog.Infof("Node %v has CIDR %s, occupying it in CIDR map", node.Name, node.Spec.PodCIDR) |  | ||||||
| 			} |  | ||||||
| 			if err := ra.occupyCIDR(&node); err != nil { |  | ||||||
| 				// This will happen if: |  | ||||||
| 				// 1. We find garbage in the podCIDR field. Retrying is useless. |  | ||||||
| 				// 2. CIDR out of range: This means a node CIDR has changed. |  | ||||||
| 				// This error will keep crashing controller-manager. |  | ||||||
| 				return nil, err |  | ||||||
| 			} |  | ||||||
| 		} |  | ||||||
| 	} |  | ||||||
| 	for i := 0; i < cidrUpdateWorkers; i++ { |  | ||||||
| 		go func(stopChan <-chan struct{}) { |  | ||||||
| 			for { |  | ||||||
| 				select { |  | ||||||
| 				case workItem, ok := <-ra.nodeCIDRUpdateChannel: |  | ||||||
| 					if !ok { |  | ||||||
| 						glog.Warning("NodeCIDRUpdateChannel read returned false.") |  | ||||||
| 						return |  | ||||||
| 					} |  | ||||||
| 					ra.updateCIDRAllocation(workItem) |  | ||||||
| 				case <-stopChan: |  | ||||||
| 					return |  | ||||||
| 				} |  | ||||||
| 			} |  | ||||||
| 		}(wait.NeverStop) |  | ||||||
| 	} |  | ||||||
|  |  | ||||||
| 	return ra, nil |  | ||||||
| } |  | ||||||
|  |  | ||||||
| func (r *rangeAllocator) insertNodeToProcessing(nodeName string) bool { |  | ||||||
| 	r.Lock() |  | ||||||
| 	defer r.Unlock() |  | ||||||
| 	if r.nodesInProcessing.Has(nodeName) { |  | ||||||
| 		return false |  | ||||||
| 	} |  | ||||||
| 	r.nodesInProcessing.Insert(nodeName) |  | ||||||
| 	return true |  | ||||||
| } |  | ||||||
|  |  | ||||||
| func (r *rangeAllocator) removeNodeFromProcessing(nodeName string) { |  | ||||||
| 	r.Lock() |  | ||||||
| 	defer r.Unlock() |  | ||||||
| 	r.nodesInProcessing.Delete(nodeName) |  | ||||||
| } |  | ||||||
|  |  | ||||||
| func (r *rangeAllocator) occupyCIDR(node *v1.Node) error { |  | ||||||
| 	defer r.removeNodeFromProcessing(node.Name) |  | ||||||
| 	if node.Spec.PodCIDR == "" { |  | ||||||
| 		return nil |  | ||||||
| 	} |  | ||||||
| 	_, podCIDR, err := net.ParseCIDR(node.Spec.PodCIDR) |  | ||||||
| 	if err != nil { |  | ||||||
| 		return fmt.Errorf("failed to parse node %s, CIDR %s", node.Name, node.Spec.PodCIDR) |  | ||||||
| 	} |  | ||||||
| 	if err := r.cidrs.occupy(podCIDR); err != nil { |  | ||||||
| 		return fmt.Errorf("failed to mark cidr as occupied: %v", err) |  | ||||||
| 	} |  | ||||||
| 	return nil |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // AllocateOrOccupyCIDR looks at the given node, assigns it a valid CIDR |  | ||||||
| // if it doesn't currently have one or mark the CIDR as used if the node already have one. |  | ||||||
| // WARNING: If you're adding any return calls or defer any more work from this function |  | ||||||
| // you have to handle correctly nodesInProcessing. |  | ||||||
| func (r *rangeAllocator) AllocateOrOccupyCIDR(node *v1.Node) error { |  | ||||||
| 	if node == nil { |  | ||||||
| 		return nil |  | ||||||
| 	} |  | ||||||
| 	if !r.insertNodeToProcessing(node.Name) { |  | ||||||
| 		glog.V(2).Infof("Node %v is already in a process of CIDR assignment.", node.Name) |  | ||||||
| 		return nil |  | ||||||
| 	} |  | ||||||
| 	if node.Spec.PodCIDR != "" { |  | ||||||
| 		return r.occupyCIDR(node) |  | ||||||
| 	} |  | ||||||
| 	podCIDR, err := r.cidrs.allocateNext() |  | ||||||
| 	if err != nil { |  | ||||||
| 		r.removeNodeFromProcessing(node.Name) |  | ||||||
| 		recordNodeStatusChange(r.recorder, node, "CIDRNotAvailable") |  | ||||||
| 		return fmt.Errorf("failed to allocate cidr: %v", err) |  | ||||||
| 	} |  | ||||||
|  |  | ||||||
| 	glog.V(10).Infof("Putting node %s with CIDR %s into the work queue", node.Name, podCIDR) |  | ||||||
| 	r.nodeCIDRUpdateChannel <- nodeAndCIDR{ |  | ||||||
| 		nodeName: node.Name, |  | ||||||
| 		cidr:     podCIDR, |  | ||||||
| 	} |  | ||||||
| 	return nil |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // ReleaseCIDR releases the CIDR of the removed node |  | ||||||
| func (r *rangeAllocator) ReleaseCIDR(node *v1.Node) error { |  | ||||||
| 	if node == nil || node.Spec.PodCIDR == "" { |  | ||||||
| 		return nil |  | ||||||
| 	} |  | ||||||
| 	_, podCIDR, err := net.ParseCIDR(node.Spec.PodCIDR) |  | ||||||
| 	if err != nil { |  | ||||||
| 		return fmt.Errorf("Failed to parse CIDR %s on Node %v: %v", node.Spec.PodCIDR, node.Name, err) |  | ||||||
| 	} |  | ||||||
|  |  | ||||||
| 	glog.V(4).Infof("release CIDR %s", node.Spec.PodCIDR) |  | ||||||
| 	if err = r.cidrs.release(podCIDR); err != nil { |  | ||||||
| 		return fmt.Errorf("Error when releasing CIDR %v: %v", node.Spec.PodCIDR, err) |  | ||||||
| 	} |  | ||||||
| 	return err |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // Marks all CIDRs with subNetMaskSize that belongs to serviceCIDR as used, |  | ||||||
| // so that they won't be assignable. |  | ||||||
| func (r *rangeAllocator) filterOutServiceRange(serviceCIDR *net.IPNet) { |  | ||||||
| 	// Checks if service CIDR has a nonempty intersection with cluster CIDR. It is the case if either |  | ||||||
| 	// clusterCIDR contains serviceCIDR with clusterCIDR's Mask applied (this means that clusterCIDR contains serviceCIDR) |  | ||||||
| 	// or vice versa (which means that serviceCIDR contains clusterCIDR). |  | ||||||
| 	if !r.clusterCIDR.Contains(serviceCIDR.IP.Mask(r.clusterCIDR.Mask)) && !serviceCIDR.Contains(r.clusterCIDR.IP.Mask(serviceCIDR.Mask)) { |  | ||||||
| 		return |  | ||||||
| 	} |  | ||||||
|  |  | ||||||
| 	if err := r.cidrs.occupy(serviceCIDR); err != nil { |  | ||||||
| 		glog.Errorf("Error filtering out service cidr %v: %v", serviceCIDR, err) |  | ||||||
| 	} |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // Assigns CIDR to Node and sends an update to the API server. |  | ||||||
| func (r *rangeAllocator) updateCIDRAllocation(data nodeAndCIDR) error { |  | ||||||
| 	var err error |  | ||||||
| 	var node *v1.Node |  | ||||||
| 	defer r.removeNodeFromProcessing(data.nodeName) |  | ||||||
| 	for rep := 0; rep < podCIDRUpdateRetry; rep++ { |  | ||||||
| 		// TODO: change it to using PATCH instead of full Node updates. |  | ||||||
| 		node, err = r.client.Core().Nodes().Get(data.nodeName, metav1.GetOptions{}) |  | ||||||
| 		if err != nil { |  | ||||||
| 			glog.Errorf("Failed while getting node %v to retry updating Node.Spec.PodCIDR: %v", data.nodeName, err) |  | ||||||
| 			continue |  | ||||||
| 		} |  | ||||||
| 		if node.Spec.PodCIDR != "" { |  | ||||||
| 			glog.Errorf("Node %v already has allocated CIDR %v. Releasing assigned one if different.", node.Name, node.Spec.PodCIDR) |  | ||||||
| 			if node.Spec.PodCIDR != data.cidr.String() { |  | ||||||
| 				if err := r.cidrs.release(data.cidr); err != nil { |  | ||||||
| 					glog.Errorf("Error when releasing CIDR %v", data.cidr.String()) |  | ||||||
| 				} |  | ||||||
| 			} |  | ||||||
| 			return nil |  | ||||||
| 		} |  | ||||||
| 		node.Spec.PodCIDR = data.cidr.String() |  | ||||||
| 		if _, err := r.client.Core().Nodes().Update(node); err != nil { |  | ||||||
| 			glog.Errorf("Failed while updating Node.Spec.PodCIDR (%d retries left): %v", podCIDRUpdateRetry-rep-1, err) |  | ||||||
| 		} else { |  | ||||||
| 			break |  | ||||||
| 		} |  | ||||||
| 	} |  | ||||||
| 	if err != nil { |  | ||||||
| 		recordNodeStatusChange(r.recorder, node, "CIDRAssignmentFailed") |  | ||||||
| 		// We accept the fact that we may leek CIDRs here. This is safer than releasing |  | ||||||
| 		// them in case when we don't know if request went through. |  | ||||||
| 		// NodeController restart will return all falsely allocated CIDRs to the pool. |  | ||||||
| 		if !apierrors.IsServerTimeout(err) { |  | ||||||
| 			glog.Errorf("CIDR assignment for node %v failed: %v. Releasing allocated CIDR", data.nodeName, err) |  | ||||||
| 			if releaseErr := r.cidrs.release(data.cidr); releaseErr != nil { |  | ||||||
| 				glog.Errorf("Error releasing allocated CIDR for node %v: %v", data.nodeName, releaseErr) |  | ||||||
| 			} |  | ||||||
| 		} |  | ||||||
| 	} |  | ||||||
| 	return err |  | ||||||
| } |  | ||||||
|   | |||||||
							
								
								
									
										143
									
								
								pkg/controller/node/cloud_cidr_allocator.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										143
									
								
								pkg/controller/node/cloud_cidr_allocator.go
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,143 @@ | |||||||
|  | /* | ||||||
|  | Copyright 2016 The Kubernetes Authors. | ||||||
|  |  | ||||||
|  | Licensed under the Apache License, Version 2.0 (the "License"); | ||||||
|  | you may not use this file except in compliance with the License. | ||||||
|  | You may obtain a copy of the License at | ||||||
|  |  | ||||||
|  |     http://www.apache.org/licenses/LICENSE-2.0 | ||||||
|  |  | ||||||
|  | Unless required by applicable law or agreed to in writing, software | ||||||
|  | distributed under the License is distributed on an "AS IS" BASIS, | ||||||
|  | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||||
|  | See the License for the specific language governing permissions and | ||||||
|  | limitations under the License. | ||||||
|  | */ | ||||||
|  |  | ||||||
|  | package node | ||||||
|  |  | ||||||
|  | import ( | ||||||
|  | 	"fmt" | ||||||
|  | 	"sync" | ||||||
|  |  | ||||||
|  | 	"github.com/golang/glog" | ||||||
|  |  | ||||||
|  | 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" | ||||||
|  | 	"k8s.io/apimachinery/pkg/types" | ||||||
|  |  | ||||||
|  | 	clientv1 "k8s.io/client-go/pkg/api/v1" | ||||||
|  | 	"k8s.io/client-go/tools/record" | ||||||
|  |  | ||||||
|  | 	"k8s.io/kubernetes/pkg/api" | ||||||
|  | 	"k8s.io/kubernetes/pkg/api/v1" | ||||||
|  | 	"k8s.io/kubernetes/pkg/client/clientset_generated/clientset" | ||||||
|  | 	"k8s.io/kubernetes/pkg/cloudprovider" | ||||||
|  | 	"k8s.io/kubernetes/pkg/cloudprovider/providers/gce" | ||||||
|  | 	nodeutil "k8s.io/kubernetes/pkg/util/node" | ||||||
|  | ) | ||||||
|  |  | ||||||
|  | // cloudCIDRAllocator allocates node CIDRs according to IP address aliases | ||||||
|  | // assigned by the cloud provider. In this case, the allocation and | ||||||
|  | // deallocation is delegated to the external provider, and the controller | ||||||
|  | // merely takes the assignment and updates the node spec. | ||||||
|  | type cloudCIDRAllocator struct { | ||||||
|  | 	lock sync.Mutex | ||||||
|  |  | ||||||
|  | 	client clientset.Interface | ||||||
|  | 	cloud  *gce.GCECloud | ||||||
|  |  | ||||||
|  | 	recorder record.EventRecorder | ||||||
|  | } | ||||||
|  |  | ||||||
|  | var _ CIDRAllocator = (*cloudCIDRAllocator)(nil) | ||||||
|  |  | ||||||
|  | func NewCloudCIDRAllocator( | ||||||
|  | 	client clientset.Interface, | ||||||
|  | 	cloud cloudprovider.Interface) (ca CIDRAllocator, err error) { | ||||||
|  |  | ||||||
|  | 	gceCloud, ok := cloud.(*gce.GCECloud) | ||||||
|  | 	if !ok { | ||||||
|  | 		err = fmt.Errorf("cloudCIDRAllocator does not support %v provider", cloud.ProviderName()) | ||||||
|  | 		return | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	ca = &cloudCIDRAllocator{ | ||||||
|  | 		client: client, | ||||||
|  | 		cloud:  gceCloud, | ||||||
|  | 		recorder: record.NewBroadcaster().NewRecorder( | ||||||
|  | 			api.Scheme, | ||||||
|  | 			clientv1.EventSource{Component: "cidrAllocator"}), | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	glog.V(0).Infof("Using cloud CIDR allocator (provider: %v)", cloud.ProviderName()) | ||||||
|  |  | ||||||
|  | 	return | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func (ca *cloudCIDRAllocator) AllocateOrOccupyCIDR(node *v1.Node) error { | ||||||
|  | 	glog.V(2).Infof("Updating PodCIDR for node %v", node.Name) | ||||||
|  |  | ||||||
|  | 	cidrs, err := ca.cloud.AliasRanges(types.NodeName(node.Name)) | ||||||
|  |  | ||||||
|  | 	if err != nil { | ||||||
|  | 		recordNodeStatusChange(ca.recorder, node, "CIDRNotAvailable") | ||||||
|  | 		return fmt.Errorf("failed to allocate cidr: %v", err) | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	if len(cidrs) == 0 { | ||||||
|  | 		recordNodeStatusChange(ca.recorder, node, "CIDRNotAvailable") | ||||||
|  | 		glog.V(2).Infof("Node %v has no CIDRs", node.Name) | ||||||
|  | 		return fmt.Errorf("failed to allocate cidr (none exist)") | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	node, err = ca.client.Core().Nodes().Get(node.Name, metav1.GetOptions{}) | ||||||
|  | 	if err != nil { | ||||||
|  | 		glog.Errorf("Could not get Node object from Kubernetes: %v", err) | ||||||
|  | 		return err | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	podCIDR := cidrs[0] | ||||||
|  |  | ||||||
|  | 	if node.Spec.PodCIDR != "" { | ||||||
|  | 		if node.Spec.PodCIDR == podCIDR { | ||||||
|  | 			glog.V(3).Infof("Node %v has PodCIDR %v", node.Name, podCIDR) | ||||||
|  | 			return nil | ||||||
|  | 		} | ||||||
|  | 		glog.Errorf("PodCIDR cannot be reassigned, node %v spec has %v, but cloud provider has assigned %v", | ||||||
|  | 			node.Name, node.Spec.PodCIDR, podCIDR) | ||||||
|  | 		// We fall through and set the CIDR despite this error. This | ||||||
|  | 		// implements the same logic as implemented in the | ||||||
|  | 		// rangeAllocator. | ||||||
|  | 		// | ||||||
|  | 		// See https://github.com/kubernetes/kubernetes/pull/42147#discussion_r103357248 | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	node.Spec.PodCIDR = cidrs[0] | ||||||
|  | 	if _, err := ca.client.Core().Nodes().Update(node); err == nil { | ||||||
|  | 		glog.V(2).Infof("Node %v PodCIDR set to %v", node.Name, podCIDR) | ||||||
|  | 	} else { | ||||||
|  | 		glog.Errorf("Could not update node %v PodCIDR to %v: %v", | ||||||
|  | 			node.Name, podCIDR, err) | ||||||
|  | 		return err | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	err = nodeutil.SetNodeCondition(ca.client, types.NodeName(node.Name), v1.NodeCondition{ | ||||||
|  | 		Type:               v1.NodeNetworkUnavailable, | ||||||
|  | 		Status:             v1.ConditionFalse, | ||||||
|  | 		Reason:             "RouteCreated", | ||||||
|  | 		Message:            "NodeController create implicit route", | ||||||
|  | 		LastTransitionTime: metav1.Now(), | ||||||
|  | 	}) | ||||||
|  | 	if err != nil { | ||||||
|  | 		glog.Errorf("Error setting route status for node %v: %v", | ||||||
|  | 			node.Name, err) | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	return err | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func (ca *cloudCIDRAllocator) ReleaseCIDR(node *v1.Node) error { | ||||||
|  | 	glog.V(2).Infof("Node %v PodCIDR (%v) will be released by external cloud provider (not managed by controller)", | ||||||
|  | 		node.Name, node.Spec.PodCIDR) | ||||||
|  | 	return nil | ||||||
|  | } | ||||||
| @@ -109,6 +109,8 @@ type nodeStatusData struct { | |||||||
|  |  | ||||||
| type NodeController struct { | type NodeController struct { | ||||||
| 	allocateNodeCIDRs bool | 	allocateNodeCIDRs bool | ||||||
|  | 	allocatorType     CIDRAllocatorType | ||||||
|  |  | ||||||
| 	cloud        cloudprovider.Interface | 	cloud        cloudprovider.Interface | ||||||
| 	clusterCIDR  *net.IPNet | 	clusterCIDR  *net.IPNet | ||||||
| 	serviceCIDR  *net.IPNet | 	serviceCIDR  *net.IPNet | ||||||
| @@ -162,9 +164,8 @@ type NodeController struct { | |||||||
|  |  | ||||||
| 	podInformerSynced cache.InformerSynced | 	podInformerSynced cache.InformerSynced | ||||||
|  |  | ||||||
| 	// allocate/recycle CIDRs for node if allocateNodeCIDRs == true |  | ||||||
| 	cidrAllocator CIDRAllocator | 	cidrAllocator CIDRAllocator | ||||||
| 	// manages taints |  | ||||||
| 	taintManager *NoExecuteTaintManager | 	taintManager *NoExecuteTaintManager | ||||||
|  |  | ||||||
| 	forcefullyDeletePod        func(*v1.Pod) error | 	forcefullyDeletePod        func(*v1.Pod) error | ||||||
| @@ -210,6 +211,7 @@ func NewNodeController( | |||||||
| 	serviceCIDR *net.IPNet, | 	serviceCIDR *net.IPNet, | ||||||
| 	nodeCIDRMaskSize int, | 	nodeCIDRMaskSize int, | ||||||
| 	allocateNodeCIDRs bool, | 	allocateNodeCIDRs bool, | ||||||
|  | 	allocatorType CIDRAllocatorType, | ||||||
| 	runTaintManager bool, | 	runTaintManager bool, | ||||||
| 	useTaintBasedEvictions bool) (*NodeController, error) { | 	useTaintBasedEvictions bool) (*NodeController, error) { | ||||||
| 	eventBroadcaster := record.NewBroadcaster() | 	eventBroadcaster := record.NewBroadcaster() | ||||||
| @@ -254,6 +256,7 @@ func NewNodeController( | |||||||
| 		clusterCIDR:                     clusterCIDR, | 		clusterCIDR:                     clusterCIDR, | ||||||
| 		serviceCIDR:                     serviceCIDR, | 		serviceCIDR:                     serviceCIDR, | ||||||
| 		allocateNodeCIDRs:               allocateNodeCIDRs, | 		allocateNodeCIDRs:               allocateNodeCIDRs, | ||||||
|  | 		allocatorType:                   allocatorType, | ||||||
| 		forcefullyDeletePod:             func(p *v1.Pod) error { return forcefullyDeletePod(kubeClient, p) }, | 		forcefullyDeletePod:             func(p *v1.Pod) error { return forcefullyDeletePod(kubeClient, p) }, | ||||||
| 		nodeExistsInCloudProvider:       func(nodeName types.NodeName) (bool, error) { return nodeExistsInCloudProvider(cloud, nodeName) }, | 		nodeExistsInCloudProvider:       func(nodeName types.NodeName) (bool, error) { return nodeExistsInCloudProvider(cloud, nodeName) }, | ||||||
| 		evictionLimiterQPS:              evictionLimiterQPS, | 		evictionLimiterQPS:              evictionLimiterQPS, | ||||||
| @@ -309,7 +312,6 @@ func NewNodeController( | |||||||
| 	}) | 	}) | ||||||
| 	nc.podInformerSynced = podInformer.Informer().HasSynced | 	nc.podInformerSynced = podInformer.Informer().HasSynced | ||||||
|  |  | ||||||
| 	nodeEventHandlerFuncs := cache.ResourceEventHandlerFuncs{} |  | ||||||
| 	if nc.allocateNodeCIDRs { | 	if nc.allocateNodeCIDRs { | ||||||
| 		var nodeList *v1.NodeList | 		var nodeList *v1.NodeList | ||||||
| 		var err error | 		var err error | ||||||
| @@ -328,147 +330,32 @@ func NewNodeController( | |||||||
| 		}); pollErr != nil { | 		}); pollErr != nil { | ||||||
| 			return nil, fmt.Errorf("Failed to list all nodes in %v, cannot proceed without updating CIDR map", apiserverStartupGracePeriod) | 			return nil, fmt.Errorf("Failed to list all nodes in %v, cannot proceed without updating CIDR map", apiserverStartupGracePeriod) | ||||||
| 		} | 		} | ||||||
| 		nc.cidrAllocator, err = NewCIDRRangeAllocator(kubeClient, clusterCIDR, serviceCIDR, nodeCIDRMaskSize, nodeList) |  | ||||||
|  | 		switch nc.allocatorType { | ||||||
|  | 		case RangeAllocatorType: | ||||||
|  | 			nc.cidrAllocator, err = NewCIDRRangeAllocator( | ||||||
|  | 				kubeClient, clusterCIDR, serviceCIDR, nodeCIDRMaskSize, nodeList) | ||||||
|  | 		case CloudAllocatorType: | ||||||
|  | 			nc.cidrAllocator, err = NewCloudCIDRAllocator(kubeClient, cloud) | ||||||
|  | 		default: | ||||||
|  | 			return nil, fmt.Errorf("Invalid CIDR allocator type: %v", nc.allocatorType) | ||||||
|  | 		} | ||||||
|  |  | ||||||
| 		if err != nil { | 		if err != nil { | ||||||
| 			return nil, err | 			return nil, err | ||||||
| 		} | 		} | ||||||
|  |  | ||||||
| 		nodeEventHandlerFuncs = cache.ResourceEventHandlerFuncs{ | 		nodeInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ | ||||||
| 			AddFunc: func(originalObj interface{}) { | 			AddFunc:    nc.onNodeAdd, | ||||||
| 				obj, err := api.Scheme.DeepCopy(originalObj) | 			UpdateFunc: nc.onNodeUpdate, | ||||||
| 				if err != nil { | 			DeleteFunc: nc.onNodeDelete, | ||||||
| 					utilruntime.HandleError(err) | 		}) | ||||||
| 					return |  | ||||||
| 				} |  | ||||||
| 				node := obj.(*v1.Node) |  | ||||||
|  |  | ||||||
| 				if err := nc.cidrAllocator.AllocateOrOccupyCIDR(node); err != nil { |  | ||||||
| 					utilruntime.HandleError(fmt.Errorf("Error allocating CIDR: %v", err)) |  | ||||||
| 				} |  | ||||||
| 				if nc.taintManager != nil { |  | ||||||
| 					nc.taintManager.NodeUpdated(nil, node) |  | ||||||
| 				} |  | ||||||
| 			}, |  | ||||||
| 			UpdateFunc: func(oldNode, newNode interface{}) { |  | ||||||
| 				node := newNode.(*v1.Node) |  | ||||||
| 				prevNode := oldNode.(*v1.Node) |  | ||||||
| 				// If the PodCIDR is not empty we either: |  | ||||||
| 				// - already processed a Node that already had a CIDR after NC restarted |  | ||||||
| 				//   (cidr is marked as used), |  | ||||||
| 				// - already processed a Node successfully and allocated a CIDR for it |  | ||||||
| 				//   (cidr is marked as used), |  | ||||||
| 				// - already processed a Node but we did saw a "timeout" response and |  | ||||||
| 				//   request eventually got through in this case we haven't released |  | ||||||
| 				//   the allocated CIDR (cidr is still marked as used). |  | ||||||
| 				// There's a possible error here: |  | ||||||
| 				// - NC sees a new Node and assigns a CIDR X to it, |  | ||||||
| 				// - Update Node call fails with a timeout, |  | ||||||
| 				// - Node is updated by some other component, NC sees an update and |  | ||||||
| 				//   assigns CIDR Y to the Node, |  | ||||||
| 				// - Both CIDR X and CIDR Y are marked as used in the local cache, |  | ||||||
| 				//   even though Node sees only CIDR Y |  | ||||||
| 				// The problem here is that in in-memory cache we see CIDR X as marked, |  | ||||||
| 				// which prevents it from being assigned to any new node. The cluster |  | ||||||
| 				// state is correct. |  | ||||||
| 				// Restart of NC fixes the issue. |  | ||||||
| 				if node.Spec.PodCIDR == "" { |  | ||||||
| 					nodeCopy, err := api.Scheme.Copy(node) |  | ||||||
| 					if err != nil { |  | ||||||
| 						utilruntime.HandleError(err) |  | ||||||
| 						return |  | ||||||
| 					} |  | ||||||
|  |  | ||||||
| 					if err := nc.cidrAllocator.AllocateOrOccupyCIDR(nodeCopy.(*v1.Node)); err != nil { |  | ||||||
| 						utilruntime.HandleError(fmt.Errorf("Error allocating CIDR: %v", err)) |  | ||||||
| 					} |  | ||||||
| 				} |  | ||||||
| 				if nc.taintManager != nil { |  | ||||||
| 					nc.taintManager.NodeUpdated(prevNode, node) |  | ||||||
| 				} |  | ||||||
| 			}, |  | ||||||
| 			DeleteFunc: func(originalObj interface{}) { |  | ||||||
| 				obj, err := api.Scheme.DeepCopy(originalObj) |  | ||||||
| 				if err != nil { |  | ||||||
| 					utilruntime.HandleError(err) |  | ||||||
| 					return |  | ||||||
| 				} |  | ||||||
|  |  | ||||||
| 				node, isNode := obj.(*v1.Node) |  | ||||||
| 				// We can get DeletedFinalStateUnknown instead of *v1.Node here and we need to handle that correctly. #34692 |  | ||||||
| 				if !isNode { |  | ||||||
| 					deletedState, ok := obj.(cache.DeletedFinalStateUnknown) |  | ||||||
| 					if !ok { |  | ||||||
| 						glog.Errorf("Received unexpected object: %v", obj) |  | ||||||
| 						return |  | ||||||
| 					} |  | ||||||
| 					node, ok = deletedState.Obj.(*v1.Node) |  | ||||||
| 					if !ok { |  | ||||||
| 						glog.Errorf("DeletedFinalStateUnknown contained non-Node object: %v", deletedState.Obj) |  | ||||||
| 						return |  | ||||||
| 					} |  | ||||||
| 				} |  | ||||||
| 				if nc.taintManager != nil { |  | ||||||
| 					nc.taintManager.NodeUpdated(node, nil) |  | ||||||
| 				} |  | ||||||
| 				if err := nc.cidrAllocator.ReleaseCIDR(node); err != nil { |  | ||||||
| 					glog.Errorf("Error releasing CIDR: %v", err) |  | ||||||
| 				} |  | ||||||
| 			}, |  | ||||||
| 		} |  | ||||||
| 	} else { |  | ||||||
| 		nodeEventHandlerFuncs = cache.ResourceEventHandlerFuncs{ |  | ||||||
| 			AddFunc: func(originalObj interface{}) { |  | ||||||
| 				obj, err := api.Scheme.DeepCopy(originalObj) |  | ||||||
| 				if err != nil { |  | ||||||
| 					utilruntime.HandleError(err) |  | ||||||
| 					return |  | ||||||
| 				} |  | ||||||
| 				node := obj.(*v1.Node) |  | ||||||
| 				if nc.taintManager != nil { |  | ||||||
| 					nc.taintManager.NodeUpdated(nil, node) |  | ||||||
| 				} |  | ||||||
| 			}, |  | ||||||
| 			UpdateFunc: func(oldNode, newNode interface{}) { |  | ||||||
| 				node := newNode.(*v1.Node) |  | ||||||
| 				prevNode := oldNode.(*v1.Node) |  | ||||||
| 				if nc.taintManager != nil { |  | ||||||
| 					nc.taintManager.NodeUpdated(prevNode, node) |  | ||||||
|  |  | ||||||
| 				} |  | ||||||
| 			}, |  | ||||||
| 			DeleteFunc: func(originalObj interface{}) { |  | ||||||
| 				obj, err := api.Scheme.DeepCopy(originalObj) |  | ||||||
| 				if err != nil { |  | ||||||
| 					utilruntime.HandleError(err) |  | ||||||
| 					return |  | ||||||
| 				} |  | ||||||
|  |  | ||||||
| 				node, isNode := obj.(*v1.Node) |  | ||||||
| 				// We can get DeletedFinalStateUnknown instead of *v1.Node here and we need to handle that correctly. #34692 |  | ||||||
| 				if !isNode { |  | ||||||
| 					deletedState, ok := obj.(cache.DeletedFinalStateUnknown) |  | ||||||
| 					if !ok { |  | ||||||
| 						glog.Errorf("Received unexpected object: %v", obj) |  | ||||||
| 						return |  | ||||||
| 					} |  | ||||||
| 					node, ok = deletedState.Obj.(*v1.Node) |  | ||||||
| 					if !ok { |  | ||||||
| 						glog.Errorf("DeletedFinalStateUnknown contained non-Node object: %v", deletedState.Obj) |  | ||||||
| 						return |  | ||||||
| 					} |  | ||||||
| 				} |  | ||||||
| 				if nc.taintManager != nil { |  | ||||||
| 					nc.taintManager.NodeUpdated(node, nil) |  | ||||||
| 				} |  | ||||||
| 			}, |  | ||||||
| 		} |  | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
| 	if nc.runTaintManager { | 	if nc.runTaintManager { | ||||||
| 		nc.taintManager = NewNoExecuteTaintManager(kubeClient) | 		nc.taintManager = NewNoExecuteTaintManager(kubeClient) | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
| 	nodeInformer.Informer().AddEventHandler(nodeEventHandlerFuncs) |  | ||||||
| 	nc.nodeLister = nodeInformer.Lister() | 	nc.nodeLister = nodeInformer.Lister() | ||||||
| 	nc.nodeInformerSynced = nodeInformer.Informer().HasSynced | 	nc.nodeInformerSynced = nodeInformer.Informer().HasSynced | ||||||
|  |  | ||||||
| @@ -546,6 +433,90 @@ func (nc *NodeController) doTaintingPass() { | |||||||
| 	} | 	} | ||||||
| } | } | ||||||
|  |  | ||||||
|  | func (nc *NodeController) onNodeAdd(originalObj interface{}) { | ||||||
|  | 	obj, err := api.Scheme.DeepCopy(originalObj) | ||||||
|  | 	if err != nil { | ||||||
|  | 		utilruntime.HandleError(err) | ||||||
|  | 		return | ||||||
|  | 	} | ||||||
|  | 	node := obj.(*v1.Node) | ||||||
|  |  | ||||||
|  | 	if err := nc.cidrAllocator.AllocateOrOccupyCIDR(node); err != nil { | ||||||
|  | 		utilruntime.HandleError(fmt.Errorf("Error allocating CIDR: %v", err)) | ||||||
|  | 	} | ||||||
|  | 	if nc.taintManager != nil { | ||||||
|  | 		nc.taintManager.NodeUpdated(nil, node) | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func (nc *NodeController) onNodeUpdate(oldNode, newNode interface{}) { | ||||||
|  | 	node := newNode.(*v1.Node) | ||||||
|  | 	prevNode := oldNode.(*v1.Node) | ||||||
|  | 	// If the PodCIDR is not empty we either: | ||||||
|  | 	// - already processed a Node that already had a CIDR after NC restarted | ||||||
|  | 	//   (cidr is marked as used), | ||||||
|  | 	// - already processed a Node successfully and allocated a CIDR for it | ||||||
|  | 	//   (cidr is marked as used), | ||||||
|  | 	// - already processed a Node but we did saw a "timeout" response and | ||||||
|  | 	//   request eventually got through in this case we haven't released | ||||||
|  | 	//   the allocated CIDR (cidr is still marked as used). | ||||||
|  | 	// There's a possible error here: | ||||||
|  | 	// - NC sees a new Node and assigns a CIDR X to it, | ||||||
|  | 	// - Update Node call fails with a timeout, | ||||||
|  | 	// - Node is updated by some other component, NC sees an update and | ||||||
|  | 	//   assigns CIDR Y to the Node, | ||||||
|  | 	// - Both CIDR X and CIDR Y are marked as used in the local cache, | ||||||
|  | 	//   even though Node sees only CIDR Y | ||||||
|  | 	// The problem here is that in in-memory cache we see CIDR X as marked, | ||||||
|  | 	// which prevents it from being assigned to any new node. The cluster | ||||||
|  | 	// state is correct. | ||||||
|  | 	// Restart of NC fixes the issue. | ||||||
|  | 	if node.Spec.PodCIDR == "" { | ||||||
|  | 		nodeCopy, err := api.Scheme.Copy(node) | ||||||
|  | 		if err != nil { | ||||||
|  | 			utilruntime.HandleError(err) | ||||||
|  | 			return | ||||||
|  | 		} | ||||||
|  |  | ||||||
|  | 		if err := nc.cidrAllocator.AllocateOrOccupyCIDR(nodeCopy.(*v1.Node)); err != nil { | ||||||
|  | 			utilruntime.HandleError(fmt.Errorf("Error allocating CIDR: %v", err)) | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | 	if nc.taintManager != nil { | ||||||
|  | 		nc.taintManager.NodeUpdated(prevNode, node) | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func (nc *NodeController) onNodeDelete(originalObj interface{}) { | ||||||
|  | 	obj, err := api.Scheme.DeepCopy(originalObj) | ||||||
|  | 	if err != nil { | ||||||
|  | 		utilruntime.HandleError(err) | ||||||
|  | 		return | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	node, isNode := obj.(*v1.Node) | ||||||
|  | 	// We can get DeletedFinalStateUnknown instead of *v1.Node here and | ||||||
|  | 	// we need to handle that correctly. #34692 | ||||||
|  | 	if !isNode { | ||||||
|  | 		deletedState, ok := obj.(cache.DeletedFinalStateUnknown) | ||||||
|  | 		if !ok { | ||||||
|  | 			glog.Errorf("Received unexpected object: %v", obj) | ||||||
|  | 			return | ||||||
|  | 		} | ||||||
|  | 		node, ok = deletedState.Obj.(*v1.Node) | ||||||
|  | 		if !ok { | ||||||
|  | 			glog.Errorf("DeletedFinalStateUnknown contained non-Node object: %v", deletedState.Obj) | ||||||
|  | 			return | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | 	if nc.taintManager != nil { | ||||||
|  | 		nc.taintManager.NodeUpdated(node, nil) | ||||||
|  | 	} | ||||||
|  | 	if err := nc.cidrAllocator.ReleaseCIDR(node); err != nil { | ||||||
|  | 		glog.Errorf("Error releasing CIDR: %v", err) | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  |  | ||||||
| // Run starts an asynchronous loop that monitors the status of cluster nodes. | // Run starts an asynchronous loop that monitors the status of cluster nodes. | ||||||
| func (nc *NodeController) Run() { | func (nc *NodeController) Run() { | ||||||
| 	go func() { | 	go func() { | ||||||
|   | |||||||
| @@ -101,6 +101,7 @@ func NewNodeControllerFromClient( | |||||||
| 		serviceCIDR, | 		serviceCIDR, | ||||||
| 		nodeCIDRMaskSize, | 		nodeCIDRMaskSize, | ||||||
| 		allocateNodeCIDRs, | 		allocateNodeCIDRs, | ||||||
|  | 		RangeAllocatorType, | ||||||
| 		useTaints, | 		useTaints, | ||||||
| 		useTaints, | 		useTaints, | ||||||
| 	) | 	) | ||||||
| @@ -549,9 +550,22 @@ func TestMonitorNodeStatusEvictPods(t *testing.T) { | |||||||
| 	} | 	} | ||||||
|  |  | ||||||
| 	for _, item := range table { | 	for _, item := range table { | ||||||
| 		nodeController, _ := NewNodeControllerFromClient(nil, item.fakeNodeHandler, | 		nodeController, _ := NewNodeControllerFromClient( | ||||||
| 			evictionTimeout, testRateLimiterQPS, testRateLimiterQPS, testLargeClusterThreshold, testUnhealtyThreshold, testNodeMonitorGracePeriod, | 			nil, | ||||||
| 			testNodeStartupGracePeriod, testNodeMonitorPeriod, nil, nil, 0, false, false) | 			item.fakeNodeHandler, | ||||||
|  | 			evictionTimeout, | ||||||
|  | 			testRateLimiterQPS, | ||||||
|  | 			testRateLimiterQPS, | ||||||
|  | 			testLargeClusterThreshold, | ||||||
|  | 			testUnhealtyThreshold, | ||||||
|  | 			testNodeMonitorGracePeriod, | ||||||
|  | 			testNodeStartupGracePeriod, | ||||||
|  | 			testNodeMonitorPeriod, | ||||||
|  | 			nil, | ||||||
|  | 			nil, | ||||||
|  | 			0, | ||||||
|  | 			false, | ||||||
|  | 			false) | ||||||
| 		nodeController.now = func() metav1.Time { return fakeNow } | 		nodeController.now = func() metav1.Time { return fakeNow } | ||||||
| 		nodeController.recorder = testutil.NewFakeRecorder() | 		nodeController.recorder = testutil.NewFakeRecorder() | ||||||
| 		for _, ds := range item.daemonSets { | 		for _, ds := range item.daemonSets { | ||||||
|   | |||||||
							
								
								
									
										262
									
								
								pkg/controller/node/range_allocator.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										262
									
								
								pkg/controller/node/range_allocator.go
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,262 @@ | |||||||
|  | /* | ||||||
|  | Copyright 2016 The Kubernetes Authors. | ||||||
|  |  | ||||||
|  | Licensed under the Apache License, Version 2.0 (the "License"); | ||||||
|  | you may not use this file except in compliance with the License. | ||||||
|  | You may obtain a copy of the License at | ||||||
|  |  | ||||||
|  |     http://www.apache.org/licenses/LICENSE-2.0 | ||||||
|  |  | ||||||
|  | Unless required by applicable law or agreed to in writing, software | ||||||
|  | distributed under the License is distributed on an "AS IS" BASIS, | ||||||
|  | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||||
|  | See the License for the specific language governing permissions and | ||||||
|  | limitations under the License. | ||||||
|  | */ | ||||||
|  |  | ||||||
|  | package node | ||||||
|  |  | ||||||
|  | import ( | ||||||
|  | 	"fmt" | ||||||
|  | 	"net" | ||||||
|  | 	"sync" | ||||||
|  |  | ||||||
|  | 	apierrors "k8s.io/apimachinery/pkg/api/errors" | ||||||
|  | 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" | ||||||
|  | 	"k8s.io/apimachinery/pkg/util/sets" | ||||||
|  | 	"k8s.io/apimachinery/pkg/util/wait" | ||||||
|  | 	v1core "k8s.io/client-go/kubernetes/typed/core/v1" | ||||||
|  | 	clientv1 "k8s.io/client-go/pkg/api/v1" | ||||||
|  | 	"k8s.io/client-go/tools/record" | ||||||
|  | 	"k8s.io/kubernetes/pkg/api" | ||||||
|  | 	"k8s.io/kubernetes/pkg/api/v1" | ||||||
|  | 	"k8s.io/kubernetes/pkg/client/clientset_generated/clientset" | ||||||
|  |  | ||||||
|  | 	"github.com/golang/glog" | ||||||
|  | ) | ||||||
|  |  | ||||||
|  | // TODO: figure out the good setting for those constants. | ||||||
|  | const ( | ||||||
|  | 	// controls how many NodeSpec updates NC can process concurrently. | ||||||
|  | 	cidrUpdateWorkers   = 10 | ||||||
|  | 	cidrUpdateQueueSize = 5000 | ||||||
|  | 	// podCIDRUpdateRetry controls the number of retries of writing Node.Spec.PodCIDR update. | ||||||
|  | 	podCIDRUpdateRetry = 5 | ||||||
|  | ) | ||||||
|  |  | ||||||
|  | type rangeAllocator struct { | ||||||
|  | 	client      clientset.Interface | ||||||
|  | 	cidrs       *cidrSet | ||||||
|  | 	clusterCIDR *net.IPNet | ||||||
|  | 	maxCIDRs    int | ||||||
|  | 	// Channel that is used to pass updating Nodes with assigned CIDRs to the background | ||||||
|  | 	// This increases a throughput of CIDR assignment by not blocking on long operations. | ||||||
|  | 	nodeCIDRUpdateChannel chan nodeAndCIDR | ||||||
|  | 	recorder              record.EventRecorder | ||||||
|  | 	// Keep a set of nodes that are currectly being processed to avoid races in CIDR allocation | ||||||
|  | 	sync.Mutex | ||||||
|  | 	nodesInProcessing sets.String | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // NewCIDRRangeAllocator returns a CIDRAllocator to allocate CIDR for node | ||||||
|  | // Caller must ensure subNetMaskSize is not less than cluster CIDR mask size. | ||||||
|  | // Caller must always pass in a list of existing nodes so the new allocator | ||||||
|  | // can initialize its CIDR map. NodeList is only nil in testing. | ||||||
|  | func NewCIDRRangeAllocator(client clientset.Interface, clusterCIDR *net.IPNet, serviceCIDR *net.IPNet, subNetMaskSize int, nodeList *v1.NodeList) (CIDRAllocator, error) { | ||||||
|  | 	eventBroadcaster := record.NewBroadcaster() | ||||||
|  | 	recorder := eventBroadcaster.NewRecorder(api.Scheme, clientv1.EventSource{Component: "cidrAllocator"}) | ||||||
|  | 	eventBroadcaster.StartLogging(glog.Infof) | ||||||
|  | 	if client != nil { | ||||||
|  | 		glog.V(0).Infof("Sending events to api server.") | ||||||
|  | 		eventBroadcaster.StartRecordingToSink(&v1core.EventSinkImpl{Interface: v1core.New(client.Core().RESTClient()).Events("")}) | ||||||
|  | 	} else { | ||||||
|  | 		glog.Fatalf("kubeClient is nil when starting NodeController") | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	ra := &rangeAllocator{ | ||||||
|  | 		client:                client, | ||||||
|  | 		cidrs:                 newCIDRSet(clusterCIDR, subNetMaskSize), | ||||||
|  | 		clusterCIDR:           clusterCIDR, | ||||||
|  | 		nodeCIDRUpdateChannel: make(chan nodeAndCIDR, cidrUpdateQueueSize), | ||||||
|  | 		recorder:              recorder, | ||||||
|  | 		nodesInProcessing:     sets.NewString(), | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	if serviceCIDR != nil { | ||||||
|  | 		ra.filterOutServiceRange(serviceCIDR) | ||||||
|  | 	} else { | ||||||
|  | 		glog.V(0).Info("No Service CIDR provided. Skipping filtering out service addresses.") | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	if nodeList != nil { | ||||||
|  | 		for _, node := range nodeList.Items { | ||||||
|  | 			if node.Spec.PodCIDR == "" { | ||||||
|  | 				glog.Infof("Node %v has no CIDR, ignoring", node.Name) | ||||||
|  | 				continue | ||||||
|  | 			} else { | ||||||
|  | 				glog.Infof("Node %v has CIDR %s, occupying it in CIDR map", | ||||||
|  | 					node.Name, node.Spec.PodCIDR) | ||||||
|  | 			} | ||||||
|  | 			if err := ra.occupyCIDR(&node); err != nil { | ||||||
|  | 				// This will happen if: | ||||||
|  | 				// 1. We find garbage in the podCIDR field. Retrying is useless. | ||||||
|  | 				// 2. CIDR out of range: This means a node CIDR has changed. | ||||||
|  | 				// This error will keep crashing controller-manager. | ||||||
|  | 				return nil, err | ||||||
|  | 			} | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | 	for i := 0; i < cidrUpdateWorkers; i++ { | ||||||
|  | 		go func(stopChan <-chan struct{}) { | ||||||
|  | 			for { | ||||||
|  | 				select { | ||||||
|  | 				case workItem, ok := <-ra.nodeCIDRUpdateChannel: | ||||||
|  | 					if !ok { | ||||||
|  | 						glog.Warning("NodeCIDRUpdateChannel read returned false.") | ||||||
|  | 						return | ||||||
|  | 					} | ||||||
|  | 					ra.updateCIDRAllocation(workItem) | ||||||
|  | 				case <-stopChan: | ||||||
|  | 					return | ||||||
|  | 				} | ||||||
|  | 			} | ||||||
|  | 		}(wait.NeverStop) | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	return ra, nil | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func (r *rangeAllocator) insertNodeToProcessing(nodeName string) bool { | ||||||
|  | 	r.Lock() | ||||||
|  | 	defer r.Unlock() | ||||||
|  | 	if r.nodesInProcessing.Has(nodeName) { | ||||||
|  | 		return false | ||||||
|  | 	} | ||||||
|  | 	r.nodesInProcessing.Insert(nodeName) | ||||||
|  | 	return true | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func (r *rangeAllocator) removeNodeFromProcessing(nodeName string) { | ||||||
|  | 	r.Lock() | ||||||
|  | 	defer r.Unlock() | ||||||
|  | 	r.nodesInProcessing.Delete(nodeName) | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func (r *rangeAllocator) occupyCIDR(node *v1.Node) error { | ||||||
|  | 	defer r.removeNodeFromProcessing(node.Name) | ||||||
|  | 	if node.Spec.PodCIDR == "" { | ||||||
|  | 		return nil | ||||||
|  | 	} | ||||||
|  | 	_, podCIDR, err := net.ParseCIDR(node.Spec.PodCIDR) | ||||||
|  | 	if err != nil { | ||||||
|  | 		return fmt.Errorf("failed to parse node %s, CIDR %s", node.Name, node.Spec.PodCIDR) | ||||||
|  | 	} | ||||||
|  | 	if err := r.cidrs.occupy(podCIDR); err != nil { | ||||||
|  | 		return fmt.Errorf("failed to mark cidr as occupied: %v", err) | ||||||
|  | 	} | ||||||
|  | 	return nil | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // WARNING: If you're adding any return calls or defer any more work from this | ||||||
|  | // function you have to handle correctly nodesInProcessing. | ||||||
|  | func (r *rangeAllocator) AllocateOrOccupyCIDR(node *v1.Node) error { | ||||||
|  | 	if node == nil { | ||||||
|  | 		return nil | ||||||
|  | 	} | ||||||
|  | 	if !r.insertNodeToProcessing(node.Name) { | ||||||
|  | 		glog.V(2).Infof("Node %v is already in a process of CIDR assignment.", node.Name) | ||||||
|  | 		return nil | ||||||
|  | 	} | ||||||
|  | 	if node.Spec.PodCIDR != "" { | ||||||
|  | 		return r.occupyCIDR(node) | ||||||
|  | 	} | ||||||
|  | 	podCIDR, err := r.cidrs.allocateNext() | ||||||
|  | 	if err != nil { | ||||||
|  | 		r.removeNodeFromProcessing(node.Name) | ||||||
|  | 		recordNodeStatusChange(r.recorder, node, "CIDRNotAvailable") | ||||||
|  | 		return fmt.Errorf("failed to allocate cidr: %v", err) | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	glog.V(10).Infof("Putting node %s with CIDR %s into the work queue", node.Name, podCIDR) | ||||||
|  | 	r.nodeCIDRUpdateChannel <- nodeAndCIDR{ | ||||||
|  | 		nodeName: node.Name, | ||||||
|  | 		cidr:     podCIDR, | ||||||
|  | 	} | ||||||
|  | 	return nil | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func (r *rangeAllocator) ReleaseCIDR(node *v1.Node) error { | ||||||
|  | 	if node == nil || node.Spec.PodCIDR == "" { | ||||||
|  | 		return nil | ||||||
|  | 	} | ||||||
|  | 	_, podCIDR, err := net.ParseCIDR(node.Spec.PodCIDR) | ||||||
|  | 	if err != nil { | ||||||
|  | 		return fmt.Errorf("Failed to parse CIDR %s on Node %v: %v", node.Spec.PodCIDR, node.Name, err) | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	glog.V(4).Infof("release CIDR %s", node.Spec.PodCIDR) | ||||||
|  | 	if err = r.cidrs.release(podCIDR); err != nil { | ||||||
|  | 		return fmt.Errorf("Error when releasing CIDR %v: %v", node.Spec.PodCIDR, err) | ||||||
|  | 	} | ||||||
|  | 	return err | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // Marks all CIDRs with subNetMaskSize that belongs to serviceCIDR as used, | ||||||
|  | // so that they won't be assignable. | ||||||
|  | func (r *rangeAllocator) filterOutServiceRange(serviceCIDR *net.IPNet) { | ||||||
|  | 	// Checks if service CIDR has a nonempty intersection with cluster | ||||||
|  | 	// CIDR. It is the case if either clusterCIDR contains serviceCIDR with | ||||||
|  | 	// clusterCIDR's Mask applied (this means that clusterCIDR contains | ||||||
|  | 	// serviceCIDR) or vice versa (which means that serviceCIDR contains | ||||||
|  | 	// clusterCIDR). | ||||||
|  | 	if !r.clusterCIDR.Contains(serviceCIDR.IP.Mask(r.clusterCIDR.Mask)) && !serviceCIDR.Contains(r.clusterCIDR.IP.Mask(serviceCIDR.Mask)) { | ||||||
|  | 		return | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	if err := r.cidrs.occupy(serviceCIDR); err != nil { | ||||||
|  | 		glog.Errorf("Error filtering out service cidr %v: %v", serviceCIDR, err) | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // Assigns CIDR to Node and sends an update to the API server. | ||||||
|  | func (r *rangeAllocator) updateCIDRAllocation(data nodeAndCIDR) error { | ||||||
|  | 	var err error | ||||||
|  | 	var node *v1.Node | ||||||
|  | 	defer r.removeNodeFromProcessing(data.nodeName) | ||||||
|  | 	for rep := 0; rep < podCIDRUpdateRetry; rep++ { | ||||||
|  | 		// TODO: change it to using PATCH instead of full Node updates. | ||||||
|  | 		node, err = r.client.Core().Nodes().Get(data.nodeName, metav1.GetOptions{}) | ||||||
|  | 		if err != nil { | ||||||
|  | 			glog.Errorf("Failed while getting node %v to retry updating Node.Spec.PodCIDR: %v", data.nodeName, err) | ||||||
|  | 			continue | ||||||
|  | 		} | ||||||
|  | 		if node.Spec.PodCIDR != "" { | ||||||
|  | 			glog.Errorf("Node %v already has allocated CIDR %v. Releasing assigned one if different.", node.Name, node.Spec.PodCIDR) | ||||||
|  | 			if node.Spec.PodCIDR != data.cidr.String() { | ||||||
|  | 				if err := r.cidrs.release(data.cidr); err != nil { | ||||||
|  | 					glog.Errorf("Error when releasing CIDR %v", data.cidr.String()) | ||||||
|  | 				} | ||||||
|  | 			} | ||||||
|  | 			return nil | ||||||
|  | 		} | ||||||
|  | 		node.Spec.PodCIDR = data.cidr.String() | ||||||
|  | 		if _, err := r.client.Core().Nodes().Update(node); err != nil { | ||||||
|  | 			glog.Errorf("Failed while updating Node.Spec.PodCIDR (%d retries left): %v", podCIDRUpdateRetry-rep-1, err) | ||||||
|  | 		} else { | ||||||
|  | 			break | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | 	if err != nil { | ||||||
|  | 		recordNodeStatusChange(r.recorder, node, "CIDRAssignmentFailed") | ||||||
|  | 		// We accept the fact that we may leek CIDRs here. This is safer than releasing | ||||||
|  | 		// them in case when we don't know if request went through. | ||||||
|  | 		// NodeController restart will return all falsely allocated CIDRs to the pool. | ||||||
|  | 		if !apierrors.IsServerTimeout(err) { | ||||||
|  | 			glog.Errorf("CIDR assignment for node %v failed: %v. Releasing allocated CIDR", data.nodeName, err) | ||||||
|  | 			if releaseErr := r.cidrs.release(data.cidr); releaseErr != nil { | ||||||
|  | 				glog.Errorf("Error releasing allocated CIDR for node %v: %v", data.nodeName, releaseErr) | ||||||
|  | 			} | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | 	return err | ||||||
|  | } | ||||||
| @@ -168,7 +168,7 @@ func init() { | |||||||
| 		ObjectMeta: metav1.ObjectMeta{Name: saRolePrefix + "node-controller"}, | 		ObjectMeta: metav1.ObjectMeta{Name: saRolePrefix + "node-controller"}, | ||||||
| 		Rules: []rbac.PolicyRule{ | 		Rules: []rbac.PolicyRule{ | ||||||
| 			rbac.NewRule("get", "list", "update", "delete", "patch").Groups(legacyGroup).Resources("nodes").RuleOrDie(), | 			rbac.NewRule("get", "list", "update", "delete", "patch").Groups(legacyGroup).Resources("nodes").RuleOrDie(), | ||||||
| 			rbac.NewRule("update").Groups(legacyGroup).Resources("nodes/status").RuleOrDie(), | 			rbac.NewRule("patch", "update").Groups(legacyGroup).Resources("nodes/status").RuleOrDie(), | ||||||
| 			// used for pod eviction | 			// used for pod eviction | ||||||
| 			rbac.NewRule("update").Groups(legacyGroup).Resources("pods/status").RuleOrDie(), | 			rbac.NewRule("update").Groups(legacyGroup).Resources("pods/status").RuleOrDie(), | ||||||
| 			rbac.NewRule("list", "delete").Groups(legacyGroup).Resources("pods").RuleOrDie(), | 			rbac.NewRule("list", "delete").Groups(legacyGroup).Resources("pods").RuleOrDie(), | ||||||
|   | |||||||
| @@ -561,6 +561,7 @@ items: | |||||||
|     resources: |     resources: | ||||||
|     - nodes/status |     - nodes/status | ||||||
|     verbs: |     verbs: | ||||||
|  |     - patch | ||||||
|     - update |     - update | ||||||
|   - apiGroups: |   - apiGroups: | ||||||
|     - "" |     - "" | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Bowei Du
					Bowei Du