mirror of
				https://github.com/optim-enterprises-bv/kubernetes.git
				synced 2025-10-30 17:58:14 +00:00 
			
		
		
		
	adding kubelet flags for node allocatable phase 2
Signed-off-by: Vishnu Kannan <vishnuk@google.com>
This commit is contained in:
		 Vishnu Kannan
					Vishnu Kannan
				
			
				
					committed by
					
						 Vishnu kannan
						Vishnu kannan
					
				
			
			
				
	
			
			
			 Vishnu kannan
						Vishnu kannan
					
				
			
						parent
						
							9a1f0574a4
						
					
				
				
					commit
					70e340b045
				
			| @@ -225,8 +225,7 @@ func (s *KubeletServer) AddFlags(fs *pflag.FlagSet) { | ||||
| 	fs.Float64Var(&s.ChaosChance, "chaos-chance", s.ChaosChance, "If > 0.0, introduce random client errors and latency. Intended for testing. [default=0.0]") | ||||
| 	fs.BoolVar(&s.Containerized, "containerized", s.Containerized, "Experimental support for running kubelet in a container.  Intended for testing. [default=false]") | ||||
| 	fs.Int64Var(&s.MaxOpenFiles, "max-open-files", s.MaxOpenFiles, "Number of files that can be opened by Kubelet process. [default=1000000]") | ||||
| 	fs.Var(&s.SystemReserved, "system-reserved", "A set of ResourceName=ResourceQuantity (e.g. cpu=200m,memory=150G) pairs that describe resources reserved for non-kubernetes components. Currently only cpu and memory are supported. See http://kubernetes.io/docs/user-guide/compute-resources for more detail. [default=none]") | ||||
| 	fs.Var(&s.KubeReserved, "kube-reserved", "A set of ResourceName=ResourceQuantity (e.g. cpu=200m,memory=150G) pairs that describe resources reserved for kubernetes system components. Currently only cpu and memory are supported. See http://kubernetes.io/docs/user-guide/compute-resources for more detail. [default=none]") | ||||
|  | ||||
| 	fs.BoolVar(&s.RegisterSchedulable, "register-schedulable", s.RegisterSchedulable, "Register the node as schedulable. Won't have any effect if register-node is false. [default=true]") | ||||
| 	fs.MarkDeprecated("register-schedulable", "will be removed in a future version") | ||||
| 	fs.Var(utiltaints.NewTaintsVar(&s.RegisterWithTaints), "register-with-taints", "Register the node with the given list of taints (comma seperated \"<key>=<value>:<effect>\"). No-op if register-node is false.") | ||||
| @@ -264,4 +263,13 @@ func (s *KubeletServer) AddFlags(fs *pflag.FlagSet) { | ||||
| 	fs.StringVar(&s.RemoteImageEndpoint, "image-service-endpoint", s.RemoteImageEndpoint, "[Experimental] The unix socket endpoint of remote image service. If not specified, it will be the same with container-runtime-endpoint by default. The endpoint is used only when CRI integration is enabled (--enable-cri)") | ||||
|  | ||||
| 	fs.BoolVar(&s.ExperimentalCheckNodeCapabilitiesBeforeMount, "experimental-check-node-capabilities-before-mount", s.ExperimentalCheckNodeCapabilitiesBeforeMount, "[Experimental] if set true, the kubelet will check the underlying node for required componenets (binaries, etc.) before performing the mount") | ||||
|  | ||||
| 	// Node Allocatable Flags | ||||
| 	fs.Var(&s.SystemReserved, "system-reserved", "A set of ResourceName=ResourceQuantity (e.g. cpu=200m,memory=150G) pairs that describe resources reserved for non-kubernetes components. Currently only cpu and memory are supported. See http://kubernetes.io/docs/user-guide/compute-resources for more detail. [default=none]") | ||||
| 	fs.Var(&s.KubeReserved, "kube-reserved", "A set of ResourceName=ResourceQuantity (e.g. cpu=200m,memory=150G) pairs that describe resources reserved for kubernetes system components. Currently only cpu and memory are supported. See http://kubernetes.io/docs/user-guide/compute-resources for more detail. [default=none]") | ||||
|  | ||||
| 	fs.StringSliceVar(&s.EnforceNodeAllocatable, "enforce-node-allocatable", s.EnforceNodeAllocatable, "A comma separated list of levels of node allocatable enforcement to be enforced by kubelet. Acceptible options are 'pods', 'system-reserved' & 'kube-reserved'. If the latter two options are specified, '--system-reserved-cgroup' & '--kube-reserved-cgroup' must also be set respectively. See https://github.com/kubernetes/community/blob/master/contributors/design-proposals/node-allocatable.md for more details. [default='']") | ||||
| 	fs.StringVar(&s.SystemReservedCgroup, "system-reserved-cgroup", s.SystemReservedCgroup, "Absolute name of the top level cgroup that is used to manage non-kubernetes components for which compute resources were reserved via '--system-reserved' flag. Ex. '/system-reserved'. [default='']") | ||||
| 	fs.StringVar(&s.KubeReservedCgroup, "kube-reserved-cgroup", s.KubeReservedCgroup, "Absolute name of the top level cgroup that is used to manage kubernetes components for which compute resources were reserved via '--kube-reserved' flag. Ex. '/kube-reserved'. [default='']") | ||||
| 	fs.BoolVar(&s.ExperimentalNodeAllocatableIgnoreEvictionThreshold, "experimental-node-allocatable-ignore-eviction-threshold", s.ExperimentalNodeAllocatableIgnoreEvictionThreshold, "When set to 'true', Hard Eviction Thresholds will be ignored while calculating Node Allocatable. See https://github.com/kubernetes/community/blob/master/contributors/design-proposals/node-allocatable.md for more details. [default=false]") | ||||
| } | ||||
|   | ||||
| @@ -40,6 +40,7 @@ import ( | ||||
| 	"k8s.io/apimachinery/pkg/runtime" | ||||
| 	"k8s.io/apimachinery/pkg/types" | ||||
| 	utilruntime "k8s.io/apimachinery/pkg/util/runtime" | ||||
| 	"k8s.io/apimachinery/pkg/util/sets" | ||||
| 	"k8s.io/apimachinery/pkg/util/wait" | ||||
| 	"k8s.io/apiserver/pkg/server/healthz" | ||||
| 	utilfeature "k8s.io/apiserver/pkg/util/feature" | ||||
| @@ -469,6 +470,14 @@ func run(s *options.KubeletServer, kubeDeps *kubelet.KubeletDeps) (err error) { | ||||
| 				CgroupDriver:          s.CgroupDriver, | ||||
| 				ProtectKernelDefaults: s.ProtectKernelDefaults, | ||||
| 				EnableCRI:             s.EnableCRI, | ||||
| 				NodeAllocatableConfig: cm.NodeAllocatableConfig{ | ||||
| 					KubeReservedCgroupName:   s.KubeReservedCgroup, | ||||
| 					SystemReservedCgroupName: s.SystemReservedCgroup, | ||||
| 					EnforceNodeAllocatable:   sets.NewString(s.EnforceNodeAllocatable...), | ||||
| 					KubeReserved:             kubeReserved, | ||||
| 					SystemReserved:           systemReserved, | ||||
| 					HardEvictionThresholds:   hardEvictionThresholds, | ||||
| 				}, | ||||
| 			}, | ||||
| 			s.ExperimentalFailSwapOn) | ||||
|  | ||||
|   | ||||
| @@ -442,16 +442,6 @@ type KubeletConfiguration struct { | ||||
| 	// manage attachment/detachment of volumes scheduled to this node, and | ||||
| 	// disables kubelet from executing any attach/detach operations | ||||
| 	EnableControllerAttachDetach bool | ||||
| 	// A set of ResourceName=ResourceQuantity (e.g. cpu=200m,memory=150G) pairs | ||||
| 	// that describe resources reserved for non-kubernetes components. | ||||
| 	// Currently only cpu and memory are supported. [default=none] | ||||
| 	// See http://kubernetes.io/docs/user-guide/compute-resources for more detail. | ||||
| 	SystemReserved ConfigurationMap | ||||
| 	// A set of ResourceName=ResourceQuantity (e.g. cpu=200m,memory=150G) pairs | ||||
| 	// that describe resources reserved for kubernetes system components. | ||||
| 	// Currently only cpu and memory are supported. [default=none] | ||||
| 	// See http://kubernetes.io/docs/user-guide/compute-resources for more detail. | ||||
| 	KubeReserved ConfigurationMap | ||||
| 	// Default behaviour for kernel tuning | ||||
| 	ProtectKernelDefaults bool | ||||
| 	// If true, Kubelet ensures a set of iptables rules are present on host. | ||||
| @@ -485,6 +475,32 @@ type KubeletConfiguration struct { | ||||
| 	// This flag, if set, instructs the kubelet to keep volumes from terminated pods mounted to the node. | ||||
| 	// This can be useful for debugging volume related issues. | ||||
| 	KeepTerminatedPodVolumes bool | ||||
|  | ||||
| 	/* following flags are meant for Node Allocatable */ | ||||
|  | ||||
| 	// A set of ResourceName=ResourceQuantity (e.g. cpu=200m,memory=150G) pairs | ||||
| 	// that describe resources reserved for non-kubernetes components. | ||||
| 	// Currently only cpu and memory are supported. [default=none] | ||||
| 	// See http://kubernetes.io/docs/user-guide/compute-resources for more detail. | ||||
| 	SystemReserved ConfigurationMap | ||||
| 	// A set of ResourceName=ResourceQuantity (e.g. cpu=200m,memory=150G) pairs | ||||
| 	// that describe resources reserved for kubernetes system components. | ||||
| 	// Currently only cpu and memory are supported. [default=none] | ||||
| 	// See http://kubernetes.io/docs/user-guide/compute-resources for more detail. | ||||
| 	KubeReserved ConfigurationMap | ||||
| 	// This flag helps kubelet identify absolute name of top level cgroup used to enforce `SystemReserved` compute resource reservation for OS system daemons. | ||||
| 	// Refer to [Node Allocatable](https://github.com/kubernetes/community/blob/master/contributors/design-proposals/node-allocatable.md) doc for more information. | ||||
| 	SystemReservedCgroup string | ||||
| 	// This flag helps kubelet identify absolute name of top level cgroup used to enforce `KubeReserved` compute resource reservation for Kubernetes node system daemons. | ||||
| 	// Refer to [Node Allocatable](https://github.com/kubernetes/community/blob/master/contributors/design-proposals/node-allocatable.md) doc for more information. | ||||
| 	KubeReservedCgroup string | ||||
| 	// This flag specifies the various Node Allocatable enforcements that Kubelet needs to perform. | ||||
| 	// This flag accepts a list of options. Acceptible options are `pods`, `system-reserved` & `kube-reserved`. | ||||
| 	// Refer to [Node Allocatable](https://github.com/kubernetes/community/blob/master/contributors/design-proposals/node-allocatable.md) doc for more information. | ||||
| 	EnforceNodeAllocatable []string | ||||
| 	// This flag, if set, will avoid including `EvictionHard` limits while computing Node Allocatable. | ||||
| 	// Refer to [Node Allocatable](https://github.com/kubernetes/community/blob/master/contributors/design-proposals/node-allocatable.md) doc for more information. | ||||
| 	ExperimentalNodeAllocatableIgnoreEvictionThreshold bool | ||||
| } | ||||
|  | ||||
| type KubeletAuthorizationMode string | ||||
|   | ||||
| @@ -48,7 +48,12 @@ const ( | ||||
| 	defaultIPTablesDropBit       = 15 | ||||
| ) | ||||
|  | ||||
| var zeroDuration = metav1.Duration{} | ||||
| var ( | ||||
| 	zeroDuration = metav1.Duration{} | ||||
| 	// Refer to [Node Allocatable](https://github.com/kubernetes/community/blob/master/contributors/design-proposals/node-allocatable.md) doc for more information. | ||||
| 	// TODO: Set the default to "pods" once cgroups per qos is turned on by default. | ||||
| 	defaultNodeAllocatableEnforcement = []string{} | ||||
| ) | ||||
|  | ||||
| func addDefaultingFuncs(scheme *kruntime.Scheme) error { | ||||
| 	RegisterDefaults(scheme) | ||||
| @@ -401,6 +406,9 @@ func SetDefaults_KubeletConfiguration(obj *KubeletConfiguration) { | ||||
| 	if obj.CgroupDriver == "" { | ||||
| 		obj.CgroupDriver = "cgroupfs" | ||||
| 	} | ||||
| 	if obj.EnforceNodeAllocatable == nil { | ||||
| 		obj.EnforceNodeAllocatable = defaultNodeAllocatableEnforcement | ||||
| 	} | ||||
| 	if obj.EnableCRI == nil { | ||||
| 		obj.EnableCRI = boolVar(true) | ||||
| 	} | ||||
|   | ||||
| @@ -478,16 +478,6 @@ type KubeletConfiguration struct { | ||||
| 	// manage attachment/detachment of volumes scheduled to this node, and | ||||
| 	// disables kubelet from executing any attach/detach operations | ||||
| 	EnableControllerAttachDetach *bool `json:"enableControllerAttachDetach"` | ||||
| 	// A set of ResourceName=ResourceQuantity (e.g. cpu=200m,memory=150G) pairs | ||||
| 	// that describe resources reserved for non-kubernetes components. | ||||
| 	// Currently only cpu and memory are supported. [default=none] | ||||
| 	// See http://kubernetes.io/docs/user-guide/compute-resources for more detail. | ||||
| 	SystemReserved map[string]string `json:"systemReserved"` | ||||
| 	// A set of ResourceName=ResourceQuantity (e.g. cpu=200m,memory=150G) pairs | ||||
| 	// that describe resources reserved for kubernetes system components. | ||||
| 	// Currently only cpu and memory are supported. [default=none] | ||||
| 	// See http://kubernetes.io/docs/user-guide/compute-resources for more detail. | ||||
| 	KubeReserved map[string]string `json:"kubeReserved"` | ||||
| 	// Default behaviour for kernel tuning | ||||
| 	ProtectKernelDefaults bool `json:"protectKernelDefaults"` | ||||
| 	// If true, Kubelet ensures a set of iptables rules are present on host. | ||||
| @@ -522,6 +512,33 @@ type KubeletConfiguration struct { | ||||
| 	// This flag, if set, instructs the kubelet to keep volumes from terminated pods mounted to the node. | ||||
| 	// This can be useful for debugging volume related issues. | ||||
| 	KeepTerminatedPodVolumes bool `json:"keepTerminatedPodVolumes,omitempty"` | ||||
|  | ||||
| 	/* following flags are meant for Node Allocatable */ | ||||
|  | ||||
| 	// A set of ResourceName=ResourceQuantity (e.g. cpu=200m,memory=150G) pairs | ||||
| 	// that describe resources reserved for non-kubernetes components. | ||||
| 	// Currently only cpu and memory are supported. [default=none] | ||||
| 	// See http://kubernetes.io/docs/user-guide/compute-resources for more detail. | ||||
| 	SystemReserved map[string]string `json:"systemReserved"` | ||||
| 	// A set of ResourceName=ResourceQuantity (e.g. cpu=200m,memory=150G) pairs | ||||
| 	// that describe resources reserved for kubernetes system components. | ||||
| 	// Currently only cpu and memory are supported. [default=none] | ||||
| 	// See http://kubernetes.io/docs/user-guide/compute-resources for more detail. | ||||
| 	KubeReserved map[string]string `json:"kubeReserved"` | ||||
|  | ||||
| 	// This flag helps kubelet identify absolute name of top level cgroup used to enforce `SystemReserved` compute resource reservation for OS system daemons. | ||||
| 	// Refer to [Node Allocatable](https://github.com/kubernetes/community/blob/master/contributors/design-proposals/node-allocatable.md) doc for more information. | ||||
| 	SystemReservedCgroup string `json:"systemReservedCgroup,omitempty"` | ||||
| 	// This flag helps kubelet identify absolute name of top level cgroup used to enforce `KubeReserved` compute resource reservation for Kubernetes node system daemons. | ||||
| 	// Refer to [Node Allocatable](https://github.com/kubernetes/community/blob/master/contributors/design-proposals/node-allocatable.md) doc for more information. | ||||
| 	KubeReservedCgroup string `json:"kubeReservedCgroup,omitempty"` | ||||
| 	// This flag specifies the various Node Allocatable enforcements that Kubelet needs to perform. | ||||
| 	// This flag accepts a list of options. Acceptible options are `pods`, `system-reserved` & `kube-reserved`. | ||||
| 	// Refer to [Node Allocatable](https://github.com/kubernetes/community/blob/master/contributors/design-proposals/node-allocatable.md) doc for more information. | ||||
| 	EnforceNodeAllocatable []string `json:"enforceNodeAllocatable,omitempty"` | ||||
| 	// This flag, if set, will avoid including `EvictionHard` limits while computing Node Allocatable. | ||||
| 	// Refer to [Node Allocatable](https://github.com/kubernetes/community/blob/master/contributors/design-proposals/node-allocatable.md) doc for more information. | ||||
| 	ExperimentalNodeAllocatableIgnoreEvictionThreshold bool `json:"experimentalNodeAllocatableIgnoreEvictionThreshold,omitempty"` | ||||
| } | ||||
|  | ||||
| type KubeletAuthorizationMode string | ||||
|   | ||||
| @@ -396,8 +396,6 @@ func autoConvert_v1alpha1_KubeletConfiguration_To_componentconfig_KubeletConfigu | ||||
| 	if err := v1.Convert_Pointer_bool_To_bool(&in.EnableControllerAttachDetach, &out.EnableControllerAttachDetach, s); err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	out.SystemReserved = *(*componentconfig.ConfigurationMap)(unsafe.Pointer(&in.SystemReserved)) | ||||
| 	out.KubeReserved = *(*componentconfig.ConfigurationMap)(unsafe.Pointer(&in.KubeReserved)) | ||||
| 	out.ProtectKernelDefaults = in.ProtectKernelDefaults | ||||
| 	if err := v1.Convert_Pointer_bool_To_bool(&in.MakeIPTablesUtilChains, &out.MakeIPTablesUtilChains, s); err != nil { | ||||
| 		return err | ||||
| @@ -416,6 +414,12 @@ func autoConvert_v1alpha1_KubeletConfiguration_To_componentconfig_KubeletConfigu | ||||
| 	out.ExperimentalFailSwapOn = in.ExperimentalFailSwapOn | ||||
| 	out.ExperimentalCheckNodeCapabilitiesBeforeMount = in.ExperimentalCheckNodeCapabilitiesBeforeMount | ||||
| 	out.KeepTerminatedPodVolumes = in.KeepTerminatedPodVolumes | ||||
| 	out.SystemReserved = *(*componentconfig.ConfigurationMap)(unsafe.Pointer(&in.SystemReserved)) | ||||
| 	out.KubeReserved = *(*componentconfig.ConfigurationMap)(unsafe.Pointer(&in.KubeReserved)) | ||||
| 	out.SystemReservedCgroup = in.SystemReservedCgroup | ||||
| 	out.KubeReservedCgroup = in.KubeReservedCgroup | ||||
| 	out.EnforceNodeAllocatable = *(*[]string)(unsafe.Pointer(&in.EnforceNodeAllocatable)) | ||||
| 	out.ExperimentalNodeAllocatableIgnoreEvictionThreshold = in.ExperimentalNodeAllocatableIgnoreEvictionThreshold | ||||
| 	return nil | ||||
| } | ||||
| 
 | ||||
| @@ -570,8 +574,6 @@ func autoConvert_componentconfig_KubeletConfiguration_To_v1alpha1_KubeletConfigu | ||||
| 	if err := v1.Convert_bool_To_Pointer_bool(&in.EnableControllerAttachDetach, &out.EnableControllerAttachDetach, s); err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	out.SystemReserved = *(*map[string]string)(unsafe.Pointer(&in.SystemReserved)) | ||||
| 	out.KubeReserved = *(*map[string]string)(unsafe.Pointer(&in.KubeReserved)) | ||||
| 	out.ProtectKernelDefaults = in.ProtectKernelDefaults | ||||
| 	if err := v1.Convert_bool_To_Pointer_bool(&in.MakeIPTablesUtilChains, &out.MakeIPTablesUtilChains, s); err != nil { | ||||
| 		return err | ||||
| @@ -590,6 +592,12 @@ func autoConvert_componentconfig_KubeletConfiguration_To_v1alpha1_KubeletConfigu | ||||
| 	out.ExperimentalFailSwapOn = in.ExperimentalFailSwapOn | ||||
| 	out.ExperimentalCheckNodeCapabilitiesBeforeMount = in.ExperimentalCheckNodeCapabilitiesBeforeMount | ||||
| 	out.KeepTerminatedPodVolumes = in.KeepTerminatedPodVolumes | ||||
| 	out.SystemReserved = *(*map[string]string)(unsafe.Pointer(&in.SystemReserved)) | ||||
| 	out.KubeReserved = *(*map[string]string)(unsafe.Pointer(&in.KubeReserved)) | ||||
| 	out.SystemReservedCgroup = in.SystemReservedCgroup | ||||
| 	out.KubeReservedCgroup = in.KubeReservedCgroup | ||||
| 	out.EnforceNodeAllocatable = *(*[]string)(unsafe.Pointer(&in.EnforceNodeAllocatable)) | ||||
| 	out.ExperimentalNodeAllocatableIgnoreEvictionThreshold = in.ExperimentalNodeAllocatableIgnoreEvictionThreshold | ||||
| 	return nil | ||||
| } | ||||
| 
 | ||||
|   | ||||
| @@ -266,20 +266,6 @@ func DeepCopy_v1alpha1_KubeletConfiguration(in interface{}, out interface{}, c * | ||||
| 			*out = new(bool) | ||||
| 			**out = **in | ||||
| 		} | ||||
| 		if in.SystemReserved != nil { | ||||
| 			in, out := &in.SystemReserved, &out.SystemReserved | ||||
| 			*out = make(map[string]string) | ||||
| 			for key, val := range *in { | ||||
| 				(*out)[key] = val | ||||
| 			} | ||||
| 		} | ||||
| 		if in.KubeReserved != nil { | ||||
| 			in, out := &in.KubeReserved, &out.KubeReserved | ||||
| 			*out = make(map[string]string) | ||||
| 			for key, val := range *in { | ||||
| 				(*out)[key] = val | ||||
| 			} | ||||
| 		} | ||||
| 		if in.MakeIPTablesUtilChains != nil { | ||||
| 			in, out := &in.MakeIPTablesUtilChains, &out.MakeIPTablesUtilChains | ||||
| 			*out = new(bool) | ||||
| @@ -305,6 +291,25 @@ func DeepCopy_v1alpha1_KubeletConfiguration(in interface{}, out interface{}, c * | ||||
| 			*out = new(bool) | ||||
| 			**out = **in | ||||
| 		} | ||||
| 		if in.SystemReserved != nil { | ||||
| 			in, out := &in.SystemReserved, &out.SystemReserved | ||||
| 			*out = make(map[string]string) | ||||
| 			for key, val := range *in { | ||||
| 				(*out)[key] = val | ||||
| 			} | ||||
| 		} | ||||
| 		if in.KubeReserved != nil { | ||||
| 			in, out := &in.KubeReserved, &out.KubeReserved | ||||
| 			*out = make(map[string]string) | ||||
| 			for key, val := range *in { | ||||
| 				(*out)[key] = val | ||||
| 			} | ||||
| 		} | ||||
| 		if in.EnforceNodeAllocatable != nil { | ||||
| 			in, out := &in.EnforceNodeAllocatable, &out.EnforceNodeAllocatable | ||||
| 			*out = make([]string, len(*in)) | ||||
| 			copy(*out, *in) | ||||
| 		} | ||||
| 		return nil | ||||
| 	} | ||||
| } | ||||
|   | ||||
| @@ -177,6 +177,11 @@ func DeepCopy_componentconfig_KubeletConfiguration(in interface{}, out interface | ||||
| 				(*out)[key] = val | ||||
| 			} | ||||
| 		} | ||||
| 		if in.AllowedUnsafeSysctls != nil { | ||||
| 			in, out := &in.AllowedUnsafeSysctls, &out.AllowedUnsafeSysctls | ||||
| 			*out = make([]string, len(*in)) | ||||
| 			copy(*out, *in) | ||||
| 		} | ||||
| 		if in.SystemReserved != nil { | ||||
| 			in, out := &in.SystemReserved, &out.SystemReserved | ||||
| 			*out = make(ConfigurationMap) | ||||
| @@ -191,8 +196,8 @@ func DeepCopy_componentconfig_KubeletConfiguration(in interface{}, out interface | ||||
| 				(*out)[key] = val | ||||
| 			} | ||||
| 		} | ||||
| 		if in.AllowedUnsafeSysctls != nil { | ||||
| 			in, out := &in.AllowedUnsafeSysctls, &out.AllowedUnsafeSysctls | ||||
| 		if in.EnforceNodeAllocatable != nil { | ||||
| 			in, out := &in.EnforceNodeAllocatable, &out.EnforceNodeAllocatable | ||||
| 			*out = make([]string, len(*in)) | ||||
| 			copy(*out, *in) | ||||
| 		} | ||||
|   | ||||
| @@ -13283,34 +13283,6 @@ func GetOpenAPIDefinitions(ref openapi.ReferenceCallback) map[string]openapi.Ope | ||||
| 								Format:      "", | ||||
| 							}, | ||||
| 						}, | ||||
| 						"systemReserved": { | ||||
| 							SchemaProps: spec.SchemaProps{ | ||||
| 								Description: "A set of ResourceName=ResourceQuantity (e.g. cpu=200m,memory=150G) pairs that describe resources reserved for non-kubernetes components. Currently only cpu and memory are supported. [default=none] See http://kubernetes.io/docs/user-guide/compute-resources for more detail.", | ||||
| 								Type:        []string{"object"}, | ||||
| 								AdditionalProperties: &spec.SchemaOrBool{ | ||||
| 									Schema: &spec.Schema{ | ||||
| 										SchemaProps: spec.SchemaProps{ | ||||
| 											Type:   []string{"string"}, | ||||
| 											Format: "", | ||||
| 										}, | ||||
| 									}, | ||||
| 								}, | ||||
| 							}, | ||||
| 						}, | ||||
| 						"kubeReserved": { | ||||
| 							SchemaProps: spec.SchemaProps{ | ||||
| 								Description: "A set of ResourceName=ResourceQuantity (e.g. cpu=200m,memory=150G) pairs that describe resources reserved for kubernetes system components. Currently only cpu and memory are supported. [default=none] See http://kubernetes.io/docs/user-guide/compute-resources for more detail.", | ||||
| 								Type:        []string{"object"}, | ||||
| 								AdditionalProperties: &spec.SchemaOrBool{ | ||||
| 									Schema: &spec.Schema{ | ||||
| 										SchemaProps: spec.SchemaProps{ | ||||
| 											Type:   []string{"string"}, | ||||
| 											Format: "", | ||||
| 										}, | ||||
| 									}, | ||||
| 								}, | ||||
| 							}, | ||||
| 						}, | ||||
| 						"protectKernelDefaults": { | ||||
| 							SchemaProps: spec.SchemaProps{ | ||||
| 								Description: "Default behaviour for kernel tuning", | ||||
| @@ -13388,8 +13360,71 @@ func GetOpenAPIDefinitions(ref openapi.ReferenceCallback) map[string]openapi.Ope | ||||
| 								Format:      "", | ||||
| 							}, | ||||
| 						}, | ||||
| 						"systemReserved": { | ||||
| 							SchemaProps: spec.SchemaProps{ | ||||
| 								Description: "A set of ResourceName=ResourceQuantity (e.g. cpu=200m,memory=150G) pairs that describe resources reserved for non-kubernetes components. Currently only cpu and memory are supported. [default=none] See http://kubernetes.io/docs/user-guide/compute-resources for more detail.", | ||||
| 								Type:        []string{"object"}, | ||||
| 								AdditionalProperties: &spec.SchemaOrBool{ | ||||
| 									Schema: &spec.Schema{ | ||||
| 										SchemaProps: spec.SchemaProps{ | ||||
| 											Type:   []string{"string"}, | ||||
| 											Format: "", | ||||
| 										}, | ||||
| 									}, | ||||
| 								}, | ||||
| 							}, | ||||
| 						}, | ||||
| 						"kubeReserved": { | ||||
| 							SchemaProps: spec.SchemaProps{ | ||||
| 								Description: "A set of ResourceName=ResourceQuantity (e.g. cpu=200m,memory=150G) pairs that describe resources reserved for kubernetes system components. Currently only cpu and memory are supported. [default=none] See http://kubernetes.io/docs/user-guide/compute-resources for more detail.", | ||||
| 								Type:        []string{"object"}, | ||||
| 								AdditionalProperties: &spec.SchemaOrBool{ | ||||
| 									Schema: &spec.Schema{ | ||||
| 										SchemaProps: spec.SchemaProps{ | ||||
| 											Type:   []string{"string"}, | ||||
| 											Format: "", | ||||
| 										}, | ||||
| 									}, | ||||
| 								}, | ||||
| 							}, | ||||
| 						}, | ||||
| 						"systemReservedCgroup": { | ||||
| 							SchemaProps: spec.SchemaProps{ | ||||
| 								Description: "This flag helps kubelet identify absolute name of top level cgroup used to enforce `SystemReserved` compute resource reservation for OS system daemons. Refer to [Node Allocatable](https://github.com/kubernetes/community/blob/master/contributors/design-proposals/node-allocatable.md) doc for more information.", | ||||
| 								Type:        []string{"string"}, | ||||
| 								Format:      "", | ||||
| 							}, | ||||
| 						}, | ||||
| 						"kubeReservedCgroup": { | ||||
| 							SchemaProps: spec.SchemaProps{ | ||||
| 								Description: "This flag helps kubelet identify absolute name of top level cgroup used to enforce `KubeReserved` compute resource reservation for Kubernetes node system daemons. Refer to [Node Allocatable](https://github.com/kubernetes/community/blob/master/contributors/design-proposals/node-allocatable.md) doc for more information.", | ||||
| 								Type:        []string{"string"}, | ||||
| 								Format:      "", | ||||
| 							}, | ||||
| 						}, | ||||
| 						"enforceNodeAllocatable": { | ||||
| 							SchemaProps: spec.SchemaProps{ | ||||
| 								Description: "This flag specifies the various Node Allocatable enforcements that Kubelet needs to perform. This flag accepts a list of options. Acceptible options are `pods`, `system-reserved` & `kube-reserved`. Refer to [Node Allocatable](https://github.com/kubernetes/community/blob/master/contributors/design-proposals/node-allocatable.md) doc for more information.", | ||||
| 								Type:        []string{"array"}, | ||||
| 								Items: &spec.SchemaOrArray{ | ||||
| 									Schema: &spec.Schema{ | ||||
| 										SchemaProps: spec.SchemaProps{ | ||||
| 											Type:   []string{"string"}, | ||||
| 											Format: "", | ||||
| 										}, | ||||
| 									}, | ||||
| 								}, | ||||
| 							}, | ||||
| 						}, | ||||
| 						"experimentalNodeAllocatableIgnoreEvictionThreshold": { | ||||
| 							SchemaProps: spec.SchemaProps{ | ||||
| 								Description: "This flag, if set, will avoid including `EvictionHard` limits while computing Node Allocatable. Refer to [Node Allocatable](https://github.com/kubernetes/community/blob/master/contributors/design-proposals/node-allocatable.md) doc for more information.", | ||||
| 								Type:        []string{"boolean"}, | ||||
| 								Format:      "", | ||||
| 							}, | ||||
| 						}, | ||||
| 					}, | ||||
| 					Required: []string{"podManifestPath", "syncFrequency", "fileCheckFrequency", "httpCheckFrequency", "manifestURL", "manifestURLHeader", "enableServer", "address", "port", "readOnlyPort", "tlsCertFile", "tlsPrivateKeyFile", "certDirectory", "authentication", "authorization", "hostnameOverride", "podInfraContainerImage", "dockerEndpoint", "rootDirectory", "seccompProfileRoot", "allowPrivileged", "hostNetworkSources", "hostPIDSources", "hostIPCSources", "registryPullQPS", "registryBurst", "eventRecordQPS", "eventBurst", "enableDebuggingHandlers", "minimumGCAge", "maxPerPodContainerCount", "maxContainerCount", "cAdvisorPort", "healthzPort", "healthzBindAddress", "oomScoreAdj", "registerNode", "clusterDomain", "masterServiceNamespace", "clusterDNS", "streamingConnectionIdleTimeout", "nodeStatusUpdateFrequency", "imageMinimumGCAge", "imageGCHighThresholdPercent", "imageGCLowThresholdPercent", "lowDiskSpaceThresholdMB", "volumeStatsAggPeriod", "networkPluginName", "networkPluginDir", "cniConfDir", "cniBinDir", "networkPluginMTU", "volumePluginDir", "cloudProvider", "cloudConfigFile", "kubeletCgroups", "runtimeCgroups", "systemCgroups", "cgroupRoot", "containerRuntime", "remoteRuntimeEndpoint", "remoteImageEndpoint", "runtimeRequestTimeout", "rktPath", "rktAPIEndpoint", "rktStage1Image", "lockFilePath", "exitOnLockContention", "hairpinMode", "babysitDaemons", "maxPods", "nvidiaGPUs", "dockerExecHandlerName", "podCIDR", "resolvConf", "cpuCFSQuota", "containerized", "maxOpenFiles", "registerSchedulable", "registerWithTaints", "contentType", "kubeAPIQPS", "kubeAPIBurst", "serializeImagePulls", "outOfDiskTransitionFrequency", "nodeIP", "nodeLabels", "nonMasqueradeCIDR", "enableCustomMetrics", "evictionHard", "evictionSoft", "evictionSoftGracePeriod", "evictionPressureTransitionPeriod", "evictionMaxPodGracePeriod", "evictionMinimumReclaim", "experimentalKernelMemcgNotification", "podsPerCore", "enableControllerAttachDetach", "systemReserved", "kubeReserved", "protectKernelDefaults", "makeIPTablesUtilChains", "iptablesMasqueradeBit", "iptablesDropBit"}, | ||||
| 					Required: []string{"podManifestPath", "syncFrequency", "fileCheckFrequency", "httpCheckFrequency", "manifestURL", "manifestURLHeader", "enableServer", "address", "port", "readOnlyPort", "tlsCertFile", "tlsPrivateKeyFile", "certDirectory", "authentication", "authorization", "hostnameOverride", "podInfraContainerImage", "dockerEndpoint", "rootDirectory", "seccompProfileRoot", "allowPrivileged", "hostNetworkSources", "hostPIDSources", "hostIPCSources", "registryPullQPS", "registryBurst", "eventRecordQPS", "eventBurst", "enableDebuggingHandlers", "minimumGCAge", "maxPerPodContainerCount", "maxContainerCount", "cAdvisorPort", "healthzPort", "healthzBindAddress", "oomScoreAdj", "registerNode", "clusterDomain", "masterServiceNamespace", "clusterDNS", "streamingConnectionIdleTimeout", "nodeStatusUpdateFrequency", "imageMinimumGCAge", "imageGCHighThresholdPercent", "imageGCLowThresholdPercent", "lowDiskSpaceThresholdMB", "volumeStatsAggPeriod", "networkPluginName", "networkPluginDir", "cniConfDir", "cniBinDir", "networkPluginMTU", "volumePluginDir", "cloudProvider", "cloudConfigFile", "kubeletCgroups", "runtimeCgroups", "systemCgroups", "cgroupRoot", "containerRuntime", "remoteRuntimeEndpoint", "remoteImageEndpoint", "runtimeRequestTimeout", "rktPath", "rktAPIEndpoint", "rktStage1Image", "lockFilePath", "exitOnLockContention", "hairpinMode", "babysitDaemons", "maxPods", "nvidiaGPUs", "dockerExecHandlerName", "podCIDR", "resolvConf", "cpuCFSQuota", "containerized", "maxOpenFiles", "registerSchedulable", "registerWithTaints", "contentType", "kubeAPIQPS", "kubeAPIBurst", "serializeImagePulls", "outOfDiskTransitionFrequency", "nodeIP", "nodeLabels", "nonMasqueradeCIDR", "enableCustomMetrics", "evictionHard", "evictionSoft", "evictionSoftGracePeriod", "evictionPressureTransitionPeriod", "evictionMaxPodGracePeriod", "evictionMinimumReclaim", "experimentalKernelMemcgNotification", "podsPerCore", "enableControllerAttachDetach", "protectKernelDefaults", "makeIPTablesUtilChains", "iptablesMasqueradeBit", "iptablesDropBit", "systemReserved", "kubeReserved"}, | ||||
| 				}, | ||||
| 			}, | ||||
| 			Dependencies: []string{ | ||||
|   | ||||
| @@ -16,7 +16,10 @@ limitations under the License. | ||||
|  | ||||
| package cm | ||||
|  | ||||
| import "k8s.io/kubernetes/pkg/api/v1" | ||||
| import ( | ||||
| 	"k8s.io/apimachinery/pkg/util/sets" | ||||
| 	"k8s.io/kubernetes/pkg/api/v1" | ||||
| ) | ||||
|  | ||||
| // Manages the containers running on a machine. | ||||
| type ContainerManager interface { | ||||
| @@ -56,6 +59,16 @@ type NodeConfig struct { | ||||
| 	CgroupDriver          string | ||||
| 	ProtectKernelDefaults bool | ||||
| 	EnableCRI             bool | ||||
| 	NodeAllocatableConfig | ||||
| } | ||||
|  | ||||
| type NodeAllocatableConfig struct { | ||||
| 	KubeReservedCgroupName   string | ||||
| 	SystemReservedCgroupName string | ||||
| 	EnforceNodeAllocatable   sets.String | ||||
| 	KubeReserved             v1.ResourceList | ||||
| 	SystemReserved           v1.ResourceList | ||||
| 	HardEvictionThresholds   []evictionapi.Threshold | ||||
| } | ||||
|  | ||||
| type Status struct { | ||||
|   | ||||
							
								
								
									
										164
									
								
								pkg/kubelet/cm/node_allocatable.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										164
									
								
								pkg/kubelet/cm/node_allocatable.go
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,164 @@ | ||||
| // +build linux | ||||
|  | ||||
| /* | ||||
| Copyright 2017 The Kubernetes Authors. | ||||
|  | ||||
| Licensed under the Apache License, Version 2.0 (the "License"); | ||||
| you may not use this file except in compliance with the License. | ||||
| You may obtain a copy of the License at | ||||
|  | ||||
|     http://www.apache.org/licenses/LICENSE-2.0 | ||||
|  | ||||
| Unless required by applicable law or agreed to in writing, software | ||||
| distributed under the License is distributed on an "AS IS" BASIS, | ||||
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
| See the License for the specific language governing permissions and | ||||
| limitations under the License. | ||||
| */ | ||||
|  | ||||
| package cm | ||||
|  | ||||
| import ( | ||||
| 	"fmt" | ||||
|  | ||||
| 	"github.com/golang/glog" | ||||
|  | ||||
| 	"k8s.io/kubernetes/pkg/api/v1" | ||||
| 	evictionapi "k8s.io/kubernetes/pkg/kubelet/eviction/api" | ||||
| ) | ||||
|  | ||||
| const ( | ||||
| 	defaultNodeAllocatableCgroupName = "/kubepods" | ||||
| 	nodeAllocatableEnforcementKey    = "pods" | ||||
| 	systemReservedEnforcementKey     = "system-reserved" | ||||
| 	kubeReservedEnforcementKey       = "kube-reserved" | ||||
| ) | ||||
|  | ||||
| func createNodeAllocatableCgroups(nc NodeAllocatableConfig, nodeAllocatable v1.ResourceList, cgroupManager CgroupManager) error { | ||||
| 	cgroupConfig := &CgroupConfig{ | ||||
| 		Name: CgroupName(defaultNodeAllocatableCgroupName), | ||||
| 	} | ||||
| 	if err := cgroupManager.Create(cgroupConfig); err != nil { | ||||
| 		glog.Errorf("Failed to create %q cgroup and apply limits") | ||||
| 		return err | ||||
| 	} | ||||
| 	return nil | ||||
| } | ||||
|  | ||||
| // Enforce Node Allocatable Cgroup settings. | ||||
| func enforceNodeAllocatableCgroups(nc NodeAllocatableConfig, nodeAllocatable v1.ResourceList, cgroupManager CgroupManager) error { | ||||
| 	glog.V(4).Infof("Attempting to enforce Node Allocatable with config: %+v", nc) | ||||
| 	glog.V(4).Infof("Node Allocatable resources: %+v", nodeAllocatable) | ||||
| 	// Create top level cgroups for all pods if necessary. | ||||
| 	if nc.EnforceNodeAllocatable.Has(nodeAllocatableEnforcementKey) { | ||||
| 		cgroupConfig := &CgroupConfig{ | ||||
| 			Name:               CgroupName(defaultNodeAllocatableCgroupName), | ||||
| 			ResourceParameters: getCgroupConfig(nodeAllocatable), | ||||
| 		} | ||||
| 		glog.V(4).Infof("Updating Node Allocatable cgroup with %d cpu shares and %d bytes of memory", cgroupConfig.ResourceParameters.CpuShares, cgroupConfig.ResourceParameters.Memory) | ||||
| 		if err := cgroupManager.Update(cgroupConfig); err != nil { | ||||
| 			glog.Errorf("Failed to create %q cgroup and apply limits") | ||||
| 			return err | ||||
| 		} | ||||
| 	} | ||||
| 	// Now apply kube reserved and system reserved limits if required. | ||||
| 	if nc.EnforceNodeAllocatable.Has(systemReservedEnforcementKey) { | ||||
| 		glog.V(2).Infof("Enforcing system reserved on cgroup %q with limits: %+v", nc.SystemReservedCgroupName, nc.SystemReserved) | ||||
| 		if err := enforceExistingCgroup(cgroupManager, nc.SystemReservedCgroupName, nc.SystemReserved); err != nil { | ||||
| 			return fmt.Errorf("failed to enforce System Reserved Cgroup Limits: %v", err) | ||||
| 		} | ||||
| 	} | ||||
| 	if nc.EnforceNodeAllocatable.Has(kubeReservedEnforcementKey) { | ||||
| 		glog.V(2).Infof("Enforcing kube reserved on cgroup %q with limits: %+v", nc.KubeReservedCgroupName, nc.KubeReserved) | ||||
| 		if err := enforceExistingCgroup(cgroupManager, nc.KubeReservedCgroupName, nc.KubeReserved); err != nil { | ||||
| 			return fmt.Errorf("failed to enforce Kube Reserved Cgroup Limits: %v", err) | ||||
| 		} | ||||
| 	} | ||||
| 	return nil | ||||
| } | ||||
|  | ||||
| func enforceExistingCgroup(cgroupManager CgroupManager, cName string, rl v1.ResourceList) error { | ||||
| 	cgroupConfig := &CgroupConfig{ | ||||
| 		Name:               CgroupName(cName), | ||||
| 		ResourceParameters: getCgroupConfig(rl), | ||||
| 	} | ||||
| 	glog.V(4).Infof("Enforcing limits on cgroup %q with %d cpu shares and %d bytes of memory", cName, cgroupConfig.ResourceParameters.CpuShares, cgroupConfig.ResourceParameters.Memory) | ||||
| 	if !cgroupManager.Exists(cgroupConfig.Name) { | ||||
| 		return fmt.Errorf("%q cgroup does not exist", cgroupConfig.Name) | ||||
| 	} | ||||
| 	if err := cgroupManager.Update(cgroupConfig); err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	return nil | ||||
| } | ||||
|  | ||||
| func getCgroupConfig(rl v1.ResourceList) *ResourceConfig { | ||||
| 	// TODO(vishh): Set CPU Quota if necessary. | ||||
| 	if rl == nil { | ||||
| 		return nil | ||||
| 	} | ||||
| 	var rc ResourceConfig | ||||
| 	if q, exists := rl[v1.ResourceMemory]; exists { | ||||
| 		// Memory is defined in bytes. | ||||
| 		val := q.Value() | ||||
| 		rc.Memory = &val | ||||
| 	} | ||||
| 	if q, exists := rl[v1.ResourceCPU]; exists { | ||||
| 		// CPU is defined in milli-cores. | ||||
| 		val := MilliCPUToShares(q.MilliValue()) | ||||
| 		rc.CpuShares = &val | ||||
| 	} | ||||
| 	return &rc | ||||
| } | ||||
|  | ||||
| func (cm *containerManagerImpl) getNodeAllocatableInternal(includeHardEviction bool) v1.ResourceList { | ||||
| 	var evictionReservation v1.ResourceList | ||||
| 	if includeHardEviction { | ||||
| 		evictionReservation = hardEvictionReservation(cm.HardEvictionThresholds, cm.capacity) | ||||
| 	} | ||||
| 	result := make(v1.ResourceList) | ||||
| 	for k, v := range cm.capacity { | ||||
| 		value := *(v.Copy()) | ||||
| 		if cm.NodeConfig.SystemReserved != nil { | ||||
| 			value.Sub(cm.NodeConfig.SystemReserved[k]) | ||||
| 		} | ||||
| 		if cm.NodeConfig.KubeReserved != nil { | ||||
| 			value.Sub(cm.NodeConfig.KubeReserved[k]) | ||||
| 		} | ||||
| 		if evictionReservation != nil { | ||||
| 			value.Sub(evictionReservation[k]) | ||||
| 		} | ||||
| 		if value.Sign() < 0 { | ||||
| 			// Negative Allocatable resources don't make sense. | ||||
| 			value.Set(0) | ||||
| 		} | ||||
| 		result[k] = value | ||||
| 	} | ||||
| 	return result | ||||
|  | ||||
| } | ||||
|  | ||||
| // GetNodeAllocatable returns amount of compute resource available for pods. | ||||
| func (cm *containerManagerImpl) GetNodeAllocatable() v1.ResourceList { | ||||
| 	return cm.getNodeAllocatableInternal(!cm.NodeConfig.IgnoreHardEvictionThreshold) | ||||
| } | ||||
|  | ||||
| // hardEvictionReservation returns a resourcelist that includes reservation of resources based on hard eviction thresholds. | ||||
| func hardEvictionReservation(thresholds []evictionapi.Threshold, capacity v1.ResourceList) v1.ResourceList { | ||||
| 	if len(thresholds) == 0 { | ||||
| 		return nil | ||||
| 	} | ||||
| 	ret := v1.ResourceList{} | ||||
| 	for _, threshold := range thresholds { | ||||
| 		if threshold.Operator != evictionapi.OpLessThan { | ||||
| 			continue | ||||
| 		} | ||||
| 		switch threshold.Signal { | ||||
| 		case evictionapi.SignalMemoryAvailable: | ||||
| 			memoryCapacity := capacity[v1.ResourceMemory] | ||||
| 			value := evictionapi.GetThresholdQuantity(threshold.Value, &memoryCapacity) | ||||
| 			ret[v1.ResourceMemory] = *value | ||||
| 		} | ||||
| 	} | ||||
| 	return ret | ||||
| } | ||||
		Reference in New Issue
	
	Block a user