mirror of
				https://github.com/optim-enterprises-bv/kubernetes.git
				synced 2025-11-04 04:08:16 +00:00 
			
		
		
		
	Merge pull request #40655 from vishh/flag-gate-critical-pod-annotation
Automatic merge from submit-queue Optionally avoid evicting critical pods in kubelet For #40573 ```release-note When feature gate "ExperimentalCriticalPodAnnotation" is set, Kubelet will avoid evicting pods in "kube-system" namespace that contains a special annotation - `scheduler.alpha.kubernetes.io/critical-pod` This feature should be used in conjunction with the rescheduler to guarantee availability for critical system pods - https://kubernetes.io/docs/admin/rescheduler/ ```
This commit is contained in:
		@@ -123,7 +123,7 @@ fi
 | 
				
			|||||||
RUNTIME_CONFIG="${KUBE_RUNTIME_CONFIG:-}"
 | 
					RUNTIME_CONFIG="${KUBE_RUNTIME_CONFIG:-}"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# Optional: set feature gates
 | 
					# Optional: set feature gates
 | 
				
			||||||
FEATURE_GATES="${KUBE_FEATURE_GATES:-}"
 | 
					FEATURE_GATES="${KUBE_FEATURE_GATES:-ExperimentalCriticalPodAnnotation=true}"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# Optional: Install cluster DNS.
 | 
					# Optional: Install cluster DNS.
 | 
				
			||||||
ENABLE_CLUSTER_DNS="${KUBE_ENABLE_CLUSTER_DNS:-true}"
 | 
					ENABLE_CLUSTER_DNS="${KUBE_ENABLE_CLUSTER_DNS:-true}"
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -83,7 +83,7 @@ MASTER_IP_RANGE="${MASTER_IP_RANGE:-10.246.0.0/24}"
 | 
				
			|||||||
RUNTIME_CONFIG="${KUBE_RUNTIME_CONFIG:-}"
 | 
					RUNTIME_CONFIG="${KUBE_RUNTIME_CONFIG:-}"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# Optional: set feature gates
 | 
					# Optional: set feature gates
 | 
				
			||||||
FEATURE_GATES="${KUBE_FEATURE_GATES:-}"
 | 
					FEATURE_GATES="${KUBE_FEATURE_GATES:-ExperimentalCriticalPodAnnotation=true}"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
TERMINATED_POD_GC_THRESHOLD=${TERMINATED_POD_GC_THRESHOLD:-100}
 | 
					TERMINATED_POD_GC_THRESHOLD=${TERMINATED_POD_GC_THRESHOLD:-100}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -38,9 +38,8 @@ kind: Pod
 | 
				
			|||||||
metadata:
 | 
					metadata:
 | 
				
			||||||
  name: kube-proxy
 | 
					  name: kube-proxy
 | 
				
			||||||
  namespace: kube-system
 | 
					  namespace: kube-system
 | 
				
			||||||
  # This annotation lowers the possibility that kube-proxy gets evicted when the
 | 
					  # This annotation ensures that kube-proxy does not get evicted if the node
 | 
				
			||||||
  # node is under memory pressure, and prioritizes it for admission, even if
 | 
					  # supports critical pod annotation based priority scheme.
 | 
				
			||||||
  # the node is under memory pressure.
 | 
					 | 
				
			||||||
  # Note that kube-proxy runs as a static pod so this annotation does NOT have
 | 
					  # Note that kube-proxy runs as a static pod so this annotation does NOT have
 | 
				
			||||||
  # any effect on rescheduler (default scheduler and rescheduler are not
 | 
					  # any effect on rescheduler (default scheduler and rescheduler are not
 | 
				
			||||||
  # involved in scheduling kube-proxy).
 | 
					  # involved in scheduling kube-proxy).
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -58,6 +58,14 @@ const (
 | 
				
			|||||||
	// contains a privileged container, or specific non-namespaced capabilities (MKNOD, SYS_MODULE,
 | 
						// contains a privileged container, or specific non-namespaced capabilities (MKNOD, SYS_MODULE,
 | 
				
			||||||
	// SYS_TIME). This should only be enabled if user namespace remapping is enabled in the docker daemon.
 | 
						// SYS_TIME). This should only be enabled if user namespace remapping is enabled in the docker daemon.
 | 
				
			||||||
	ExperimentalHostUserNamespaceDefaultingGate utilfeature.Feature = "ExperimentalHostUserNamespaceDefaulting"
 | 
						ExperimentalHostUserNamespaceDefaultingGate utilfeature.Feature = "ExperimentalHostUserNamespaceDefaulting"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						// owner: @vishh
 | 
				
			||||||
 | 
						// alpha: v1.5
 | 
				
			||||||
 | 
						//
 | 
				
			||||||
 | 
						// Ensures guaranteed scheduling of pods marked with a special pod annotation `scheduler.alpha.kubernetes.io/critical-pod`
 | 
				
			||||||
 | 
						// and also prevents them from being evicted from a node.
 | 
				
			||||||
 | 
						// Note: This feature is not supported for `BestEffort` pods.
 | 
				
			||||||
 | 
						ExperimentalCriticalPodAnnotation utilfeature.Feature = "ExperimentalCriticalPodAnnotation"
 | 
				
			||||||
)
 | 
					)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
func init() {
 | 
					func init() {
 | 
				
			||||||
@@ -73,6 +81,7 @@ var defaultKubernetesFeatureGates = map[utilfeature.Feature]utilfeature.FeatureS
 | 
				
			|||||||
	DynamicKubeletConfig:                        {Default: false, PreRelease: utilfeature.Alpha},
 | 
						DynamicKubeletConfig:                        {Default: false, PreRelease: utilfeature.Alpha},
 | 
				
			||||||
	DynamicVolumeProvisioning:                   {Default: true, PreRelease: utilfeature.Alpha},
 | 
						DynamicVolumeProvisioning:                   {Default: true, PreRelease: utilfeature.Alpha},
 | 
				
			||||||
	ExperimentalHostUserNamespaceDefaultingGate: {Default: false, PreRelease: utilfeature.Beta},
 | 
						ExperimentalHostUserNamespaceDefaultingGate: {Default: false, PreRelease: utilfeature.Beta},
 | 
				
			||||||
 | 
						ExperimentalCriticalPodAnnotation:           {Default: false, PreRelease: utilfeature.Alpha},
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	// inherited features from generic apiserver, relisted here to get a conflict if it is changed
 | 
						// inherited features from generic apiserver, relisted here to get a conflict if it is changed
 | 
				
			||||||
	// unintentionally on either side:
 | 
						// unintentionally on either side:
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -9,37 +9,16 @@ load(
 | 
				
			|||||||
    "go_test",
 | 
					    "go_test",
 | 
				
			||||||
)
 | 
					)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
go_library(
 | 
					cgo_genrule(
 | 
				
			||||||
    name = "go_default_library",
 | 
					    name = "cgo_codegen",
 | 
				
			||||||
    srcs = [
 | 
					    srcs = ["threshold_notifier_linux.go"],
 | 
				
			||||||
        "doc.go",
 | 
					    clinkopts = [
 | 
				
			||||||
        "eviction_manager.go",
 | 
					        "-lz",
 | 
				
			||||||
        "helpers.go",
 | 
					        "-lm",
 | 
				
			||||||
        "types.go",
 | 
					        "-lpthread",
 | 
				
			||||||
 | 
					        "-ldl",
 | 
				
			||||||
    ],
 | 
					    ],
 | 
				
			||||||
    library = ":cgo_codegen",
 | 
					 | 
				
			||||||
    tags = ["automanaged"],
 | 
					    tags = ["automanaged"],
 | 
				
			||||||
    deps = [
 | 
					 | 
				
			||||||
        "//pkg/api:go_default_library",
 | 
					 | 
				
			||||||
        "//pkg/api/v1:go_default_library",
 | 
					 | 
				
			||||||
        "//pkg/kubelet/api/v1alpha1/stats:go_default_library",
 | 
					 | 
				
			||||||
        "//pkg/kubelet/cm:go_default_library",
 | 
					 | 
				
			||||||
        "//pkg/kubelet/lifecycle:go_default_library",
 | 
					 | 
				
			||||||
        "//pkg/kubelet/pod:go_default_library",
 | 
					 | 
				
			||||||
        "//pkg/kubelet/qos:go_default_library",
 | 
					 | 
				
			||||||
        "//pkg/kubelet/server/stats:go_default_library",
 | 
					 | 
				
			||||||
        "//pkg/kubelet/types:go_default_library",
 | 
					 | 
				
			||||||
        "//pkg/kubelet/util/format:go_default_library",
 | 
					 | 
				
			||||||
        "//pkg/quota/evaluator/core:go_default_library",
 | 
					 | 
				
			||||||
        "//vendor:github.com/golang/glog",
 | 
					 | 
				
			||||||
        "//vendor:k8s.io/apimachinery/pkg/api/resource",
 | 
					 | 
				
			||||||
        "//vendor:k8s.io/apimachinery/pkg/apis/meta/v1",
 | 
					 | 
				
			||||||
        "//vendor:k8s.io/apimachinery/pkg/util/sets",
 | 
					 | 
				
			||||||
        "//vendor:k8s.io/apimachinery/pkg/util/wait",
 | 
					 | 
				
			||||||
        "//vendor:k8s.io/client-go/pkg/api/v1",
 | 
					 | 
				
			||||||
        "//vendor:k8s.io/client-go/tools/record",
 | 
					 | 
				
			||||||
        "//vendor:k8s.io/client-go/util/clock",
 | 
					 | 
				
			||||||
    ],
 | 
					 | 
				
			||||||
)
 | 
					)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
go_test(
 | 
					go_test(
 | 
				
			||||||
@@ -60,22 +39,45 @@ go_test(
 | 
				
			|||||||
        "//vendor:k8s.io/apimachinery/pkg/api/resource",
 | 
					        "//vendor:k8s.io/apimachinery/pkg/api/resource",
 | 
				
			||||||
        "//vendor:k8s.io/apimachinery/pkg/apis/meta/v1",
 | 
					        "//vendor:k8s.io/apimachinery/pkg/apis/meta/v1",
 | 
				
			||||||
        "//vendor:k8s.io/apimachinery/pkg/types",
 | 
					        "//vendor:k8s.io/apimachinery/pkg/types",
 | 
				
			||||||
 | 
					        "//vendor:k8s.io/apiserver/pkg/util/feature",
 | 
				
			||||||
        "//vendor:k8s.io/client-go/pkg/api/v1",
 | 
					        "//vendor:k8s.io/client-go/pkg/api/v1",
 | 
				
			||||||
        "//vendor:k8s.io/client-go/tools/record",
 | 
					        "//vendor:k8s.io/client-go/tools/record",
 | 
				
			||||||
        "//vendor:k8s.io/client-go/util/clock",
 | 
					        "//vendor:k8s.io/client-go/util/clock",
 | 
				
			||||||
    ],
 | 
					    ],
 | 
				
			||||||
)
 | 
					)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
cgo_genrule(
 | 
					go_library(
 | 
				
			||||||
    name = "cgo_codegen",
 | 
					    name = "go_default_library",
 | 
				
			||||||
    srcs = ["threshold_notifier_linux.go"],
 | 
					    srcs = [
 | 
				
			||||||
    clinkopts = [
 | 
					        "doc.go",
 | 
				
			||||||
        "-lz",
 | 
					        "eviction_manager.go",
 | 
				
			||||||
        "-lm",
 | 
					        "helpers.go",
 | 
				
			||||||
        "-lpthread",
 | 
					        "types.go",
 | 
				
			||||||
        "-ldl",
 | 
					 | 
				
			||||||
    ],
 | 
					    ],
 | 
				
			||||||
 | 
					    library = ":cgo_codegen",
 | 
				
			||||||
    tags = ["automanaged"],
 | 
					    tags = ["automanaged"],
 | 
				
			||||||
 | 
					    deps = [
 | 
				
			||||||
 | 
					        "//pkg/api:go_default_library",
 | 
				
			||||||
 | 
					        "//pkg/api/v1:go_default_library",
 | 
				
			||||||
 | 
					        "//pkg/features:go_default_library",
 | 
				
			||||||
 | 
					        "//pkg/kubelet/api/v1alpha1/stats:go_default_library",
 | 
				
			||||||
 | 
					        "//pkg/kubelet/cm:go_default_library",
 | 
				
			||||||
 | 
					        "//pkg/kubelet/lifecycle:go_default_library",
 | 
				
			||||||
 | 
					        "//pkg/kubelet/qos:go_default_library",
 | 
				
			||||||
 | 
					        "//pkg/kubelet/server/stats:go_default_library",
 | 
				
			||||||
 | 
					        "//pkg/kubelet/types:go_default_library",
 | 
				
			||||||
 | 
					        "//pkg/kubelet/util/format:go_default_library",
 | 
				
			||||||
 | 
					        "//pkg/quota/evaluator/core:go_default_library",
 | 
				
			||||||
 | 
					        "//vendor:github.com/golang/glog",
 | 
				
			||||||
 | 
					        "//vendor:k8s.io/apimachinery/pkg/api/resource",
 | 
				
			||||||
 | 
					        "//vendor:k8s.io/apimachinery/pkg/apis/meta/v1",
 | 
				
			||||||
 | 
					        "//vendor:k8s.io/apimachinery/pkg/util/sets",
 | 
				
			||||||
 | 
					        "//vendor:k8s.io/apimachinery/pkg/util/wait",
 | 
				
			||||||
 | 
					        "//vendor:k8s.io/apiserver/pkg/util/feature",
 | 
				
			||||||
 | 
					        "//vendor:k8s.io/client-go/pkg/api/v1",
 | 
				
			||||||
 | 
					        "//vendor:k8s.io/client-go/tools/record",
 | 
				
			||||||
 | 
					        "//vendor:k8s.io/client-go/util/clock",
 | 
				
			||||||
 | 
					    ],
 | 
				
			||||||
)
 | 
					)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
filegroup(
 | 
					filegroup(
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -25,16 +25,17 @@ import (
 | 
				
			|||||||
	"github.com/golang/glog"
 | 
						"github.com/golang/glog"
 | 
				
			||||||
	"k8s.io/apimachinery/pkg/api/resource"
 | 
						"k8s.io/apimachinery/pkg/api/resource"
 | 
				
			||||||
	"k8s.io/apimachinery/pkg/util/wait"
 | 
						"k8s.io/apimachinery/pkg/util/wait"
 | 
				
			||||||
 | 
						utilfeature "k8s.io/apiserver/pkg/util/feature"
 | 
				
			||||||
	clientv1 "k8s.io/client-go/pkg/api/v1"
 | 
						clientv1 "k8s.io/client-go/pkg/api/v1"
 | 
				
			||||||
	"k8s.io/client-go/tools/record"
 | 
						"k8s.io/client-go/tools/record"
 | 
				
			||||||
	"k8s.io/client-go/util/clock"
 | 
						"k8s.io/client-go/util/clock"
 | 
				
			||||||
	"k8s.io/kubernetes/pkg/api/v1"
 | 
						"k8s.io/kubernetes/pkg/api/v1"
 | 
				
			||||||
 | 
						"k8s.io/kubernetes/pkg/features"
 | 
				
			||||||
	"k8s.io/kubernetes/pkg/kubelet/cm"
 | 
						"k8s.io/kubernetes/pkg/kubelet/cm"
 | 
				
			||||||
	"k8s.io/kubernetes/pkg/kubelet/lifecycle"
 | 
						"k8s.io/kubernetes/pkg/kubelet/lifecycle"
 | 
				
			||||||
	kubepod "k8s.io/kubernetes/pkg/kubelet/pod"
 | 
					 | 
				
			||||||
	"k8s.io/kubernetes/pkg/kubelet/qos"
 | 
						"k8s.io/kubernetes/pkg/kubelet/qos"
 | 
				
			||||||
	"k8s.io/kubernetes/pkg/kubelet/server/stats"
 | 
						"k8s.io/kubernetes/pkg/kubelet/server/stats"
 | 
				
			||||||
	kubetypes "k8s.io/kubernetes/pkg/kubelet/types"
 | 
						kubelettypes "k8s.io/kubernetes/pkg/kubelet/types"
 | 
				
			||||||
	"k8s.io/kubernetes/pkg/kubelet/util/format"
 | 
						"k8s.io/kubernetes/pkg/kubelet/util/format"
 | 
				
			||||||
)
 | 
					)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -111,7 +112,7 @@ func (m *managerImpl) Admit(attrs *lifecycle.PodAdmitAttributes) lifecycle.PodAd
 | 
				
			|||||||
	// the node has memory pressure, admit if not best-effort
 | 
						// the node has memory pressure, admit if not best-effort
 | 
				
			||||||
	if hasNodeCondition(m.nodeConditions, v1.NodeMemoryPressure) {
 | 
						if hasNodeCondition(m.nodeConditions, v1.NodeMemoryPressure) {
 | 
				
			||||||
		notBestEffort := v1.PodQOSBestEffort != qos.GetPodQOS(attrs.Pod)
 | 
							notBestEffort := v1.PodQOSBestEffort != qos.GetPodQOS(attrs.Pod)
 | 
				
			||||||
		if notBestEffort || kubetypes.IsCriticalPod(attrs.Pod) {
 | 
							if notBestEffort {
 | 
				
			||||||
			return lifecycle.PodAdmitResult{Admit: true}
 | 
								return lifecycle.PodAdmitResult{Admit: true}
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
@@ -313,13 +314,10 @@ func (m *managerImpl) synchronize(diskInfoProvider DiskInfoProvider, podFunc Act
 | 
				
			|||||||
	// we kill at most a single pod during each eviction interval
 | 
						// we kill at most a single pod during each eviction interval
 | 
				
			||||||
	for i := range activePods {
 | 
						for i := range activePods {
 | 
				
			||||||
		pod := activePods[i]
 | 
							pod := activePods[i]
 | 
				
			||||||
		if kubepod.IsStaticPod(pod) {
 | 
							// If the pod is marked as critical and support for critical pod annotations is enabled,
 | 
				
			||||||
			// The eviction manager doesn't evict static pods. To stop a static
 | 
							// do not evict such pods. Once Kubelet supports preemptions, these pods can be safely evicted.
 | 
				
			||||||
			// pod, the admin needs to remove the manifest from kubelet's
 | 
							if utilfeature.DefaultFeatureGate.Enabled(features.ExperimentalCriticalPodAnnotation) &&
 | 
				
			||||||
			// --config directory.
 | 
								kubelettypes.IsCriticalPod(pod) {
 | 
				
			||||||
			// TODO(39124): This is a short term fix, we can't assume static pods
 | 
					 | 
				
			||||||
			// are always well behaved.
 | 
					 | 
				
			||||||
			glog.Infof("eviction manager: NOT evicting static pod %v", pod.Name)
 | 
					 | 
				
			||||||
			continue
 | 
								continue
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
		status := v1.PodStatus{
 | 
							status := v1.PodStatus{
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -22,13 +22,15 @@ import (
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
	"k8s.io/apimachinery/pkg/api/resource"
 | 
						"k8s.io/apimachinery/pkg/api/resource"
 | 
				
			||||||
	"k8s.io/apimachinery/pkg/types"
 | 
						"k8s.io/apimachinery/pkg/types"
 | 
				
			||||||
 | 
						utilfeature "k8s.io/apiserver/pkg/util/feature"
 | 
				
			||||||
	clientv1 "k8s.io/client-go/pkg/api/v1"
 | 
						clientv1 "k8s.io/client-go/pkg/api/v1"
 | 
				
			||||||
	"k8s.io/client-go/tools/record"
 | 
						"k8s.io/client-go/tools/record"
 | 
				
			||||||
	"k8s.io/client-go/util/clock"
 | 
						"k8s.io/client-go/util/clock"
 | 
				
			||||||
 | 
						kubeapi "k8s.io/kubernetes/pkg/api"
 | 
				
			||||||
	"k8s.io/kubernetes/pkg/api/v1"
 | 
						"k8s.io/kubernetes/pkg/api/v1"
 | 
				
			||||||
	statsapi "k8s.io/kubernetes/pkg/kubelet/api/v1alpha1/stats"
 | 
						statsapi "k8s.io/kubernetes/pkg/kubelet/api/v1alpha1/stats"
 | 
				
			||||||
	"k8s.io/kubernetes/pkg/kubelet/lifecycle"
 | 
						"k8s.io/kubernetes/pkg/kubelet/lifecycle"
 | 
				
			||||||
	kubetypes "k8s.io/kubernetes/pkg/kubelet/types"
 | 
						kubelettypes "k8s.io/kubernetes/pkg/kubelet/types"
 | 
				
			||||||
)
 | 
					)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
// mockPodKiller is used to testing which pod is killed
 | 
					// mockPodKiller is used to testing which pod is killed
 | 
				
			||||||
@@ -212,8 +214,6 @@ func TestMemoryPressure(t *testing.T) {
 | 
				
			|||||||
	// create a best effort pod to test admission
 | 
						// create a best effort pod to test admission
 | 
				
			||||||
	bestEffortPodToAdmit, _ := podMaker("best-admit", newResourceList("", ""), newResourceList("", ""), "0Gi")
 | 
						bestEffortPodToAdmit, _ := podMaker("best-admit", newResourceList("", ""), newResourceList("", ""), "0Gi")
 | 
				
			||||||
	burstablePodToAdmit, _ := podMaker("burst-admit", newResourceList("100m", "100Mi"), newResourceList("200m", "200Mi"), "0Gi")
 | 
						burstablePodToAdmit, _ := podMaker("burst-admit", newResourceList("100m", "100Mi"), newResourceList("200m", "200Mi"), "0Gi")
 | 
				
			||||||
	criticalBestEffortPodToAdmit, _ := podMaker("critical-best-admit", newResourceList("", ""), newResourceList("", ""), "0Gi")
 | 
					 | 
				
			||||||
	criticalBestEffortPodToAdmit.ObjectMeta.Annotations = map[string]string{kubetypes.CriticalPodAnnotationKey: ""}
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
	// synchronize
 | 
						// synchronize
 | 
				
			||||||
	manager.synchronize(diskInfoProvider, activePodsFunc)
 | 
						manager.synchronize(diskInfoProvider, activePodsFunc)
 | 
				
			||||||
@@ -224,8 +224,8 @@ func TestMemoryPressure(t *testing.T) {
 | 
				
			|||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	// try to admit our pods (they should succeed)
 | 
						// try to admit our pods (they should succeed)
 | 
				
			||||||
	expected := []bool{true, true, true}
 | 
						expected := []bool{true, true}
 | 
				
			||||||
	for i, pod := range []*v1.Pod{bestEffortPodToAdmit, burstablePodToAdmit, criticalBestEffortPodToAdmit} {
 | 
						for i, pod := range []*v1.Pod{bestEffortPodToAdmit, burstablePodToAdmit} {
 | 
				
			||||||
		if result := manager.Admit(&lifecycle.PodAdmitAttributes{Pod: pod}); expected[i] != result.Admit {
 | 
							if result := manager.Admit(&lifecycle.PodAdmitAttributes{Pod: pod}); expected[i] != result.Admit {
 | 
				
			||||||
			t.Errorf("Admit pod: %v, expected: %v, actual: %v", pod, expected[i], result.Admit)
 | 
								t.Errorf("Admit pod: %v, expected: %v, actual: %v", pod, expected[i], result.Admit)
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
@@ -300,10 +300,9 @@ func TestMemoryPressure(t *testing.T) {
 | 
				
			|||||||
		t.Errorf("Manager chose to kill pod with incorrect grace period.  Expected: %d, actual: %d", 0, observedGracePeriod)
 | 
							t.Errorf("Manager chose to kill pod with incorrect grace period.  Expected: %d, actual: %d", 0, observedGracePeriod)
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	// the best-effort pod without critical annotation should not admit,
 | 
						// the best-effort pod should not admit, burstable should
 | 
				
			||||||
	// burstable and critical pods should
 | 
						expected = []bool{false, true}
 | 
				
			||||||
	expected = []bool{false, true, true}
 | 
						for i, pod := range []*v1.Pod{bestEffortPodToAdmit, burstablePodToAdmit} {
 | 
				
			||||||
	for i, pod := range []*v1.Pod{bestEffortPodToAdmit, burstablePodToAdmit, criticalBestEffortPodToAdmit} {
 | 
					 | 
				
			||||||
		if result := manager.Admit(&lifecycle.PodAdmitAttributes{Pod: pod}); expected[i] != result.Admit {
 | 
							if result := manager.Admit(&lifecycle.PodAdmitAttributes{Pod: pod}); expected[i] != result.Admit {
 | 
				
			||||||
			t.Errorf("Admit pod: %v, expected: %v, actual: %v", pod, expected[i], result.Admit)
 | 
								t.Errorf("Admit pod: %v, expected: %v, actual: %v", pod, expected[i], result.Admit)
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
@@ -325,9 +324,9 @@ func TestMemoryPressure(t *testing.T) {
 | 
				
			|||||||
		t.Errorf("Manager chose to kill pod: %v when no pod should have been killed", podKiller.pod.Name)
 | 
							t.Errorf("Manager chose to kill pod: %v when no pod should have been killed", podKiller.pod.Name)
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	// the best-effort pod should not admit, burstable and critical pods should
 | 
						// the best-effort pod should not admit, burstable should
 | 
				
			||||||
	expected = []bool{false, true, true}
 | 
						expected = []bool{false, true}
 | 
				
			||||||
	for i, pod := range []*v1.Pod{bestEffortPodToAdmit, burstablePodToAdmit, criticalBestEffortPodToAdmit} {
 | 
						for i, pod := range []*v1.Pod{bestEffortPodToAdmit, burstablePodToAdmit} {
 | 
				
			||||||
		if result := manager.Admit(&lifecycle.PodAdmitAttributes{Pod: pod}); expected[i] != result.Admit {
 | 
							if result := manager.Admit(&lifecycle.PodAdmitAttributes{Pod: pod}); expected[i] != result.Admit {
 | 
				
			||||||
			t.Errorf("Admit pod: %v, expected: %v, actual: %v", pod, expected[i], result.Admit)
 | 
								t.Errorf("Admit pod: %v, expected: %v, actual: %v", pod, expected[i], result.Admit)
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
@@ -350,8 +349,8 @@ func TestMemoryPressure(t *testing.T) {
 | 
				
			|||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	// all pods should admit now
 | 
						// all pods should admit now
 | 
				
			||||||
	expected = []bool{true, true, true}
 | 
						expected = []bool{true, true}
 | 
				
			||||||
	for i, pod := range []*v1.Pod{bestEffortPodToAdmit, burstablePodToAdmit, criticalBestEffortPodToAdmit} {
 | 
						for i, pod := range []*v1.Pod{bestEffortPodToAdmit, burstablePodToAdmit} {
 | 
				
			||||||
		if result := manager.Admit(&lifecycle.PodAdmitAttributes{Pod: pod}); expected[i] != result.Admit {
 | 
							if result := manager.Admit(&lifecycle.PodAdmitAttributes{Pod: pod}); expected[i] != result.Admit {
 | 
				
			||||||
			t.Errorf("Admit pod: %v, expected: %v, actual: %v", pod, expected[i], result.Admit)
 | 
								t.Errorf("Admit pod: %v, expected: %v, actual: %v", pod, expected[i], result.Admit)
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
@@ -1089,3 +1088,135 @@ func TestInodePressureNodeFsInodes(t *testing.T) {
 | 
				
			|||||||
		t.Errorf("Admit pod: %v, expected: %v, actual: %v", podToAdmit, true, result.Admit)
 | 
							t.Errorf("Admit pod: %v, expected: %v, actual: %v", podToAdmit, true, result.Admit)
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// TestCriticalPodsAreNotEvicted
 | 
				
			||||||
 | 
					func TestCriticalPodsAreNotEvicted(t *testing.T) {
 | 
				
			||||||
 | 
						podMaker := makePodWithMemoryStats
 | 
				
			||||||
 | 
						summaryStatsMaker := makeMemoryStats
 | 
				
			||||||
 | 
						podsToMake := []podToMake{
 | 
				
			||||||
 | 
							{name: "critical", requests: newResourceList("100m", "1Gi"), limits: newResourceList("100m", "1Gi"), memoryWorkingSet: "800Mi"},
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						pods := []*v1.Pod{}
 | 
				
			||||||
 | 
						podStats := map[*v1.Pod]statsapi.PodStats{}
 | 
				
			||||||
 | 
						for _, podToMake := range podsToMake {
 | 
				
			||||||
 | 
							pod, podStat := podMaker(podToMake.name, podToMake.requests, podToMake.limits, podToMake.memoryWorkingSet)
 | 
				
			||||||
 | 
							pods = append(pods, pod)
 | 
				
			||||||
 | 
							podStats[pod] = podStat
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						// Mark the pod as critical
 | 
				
			||||||
 | 
						pods[0].Annotations = map[string]string{
 | 
				
			||||||
 | 
							kubelettypes.CriticalPodAnnotationKey: "",
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						pods[0].Namespace = kubeapi.NamespaceSystem
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						podToEvict := pods[0]
 | 
				
			||||||
 | 
						activePodsFunc := func() []*v1.Pod {
 | 
				
			||||||
 | 
							return pods
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						fakeClock := clock.NewFakeClock(time.Now())
 | 
				
			||||||
 | 
						podKiller := &mockPodKiller{}
 | 
				
			||||||
 | 
						diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: false}
 | 
				
			||||||
 | 
						imageGC := &mockImageGC{freed: int64(0), err: nil}
 | 
				
			||||||
 | 
						nodeRef := &clientv1.ObjectReference{
 | 
				
			||||||
 | 
							Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: "",
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						config := Config{
 | 
				
			||||||
 | 
							MaxPodGracePeriodSeconds: 5,
 | 
				
			||||||
 | 
							PressureTransitionPeriod: time.Minute * 5,
 | 
				
			||||||
 | 
							Thresholds: []Threshold{
 | 
				
			||||||
 | 
								{
 | 
				
			||||||
 | 
									Signal:   SignalMemoryAvailable,
 | 
				
			||||||
 | 
									Operator: OpLessThan,
 | 
				
			||||||
 | 
									Value: ThresholdValue{
 | 
				
			||||||
 | 
										Quantity: quantityMustParse("1Gi"),
 | 
				
			||||||
 | 
									},
 | 
				
			||||||
 | 
								},
 | 
				
			||||||
 | 
								{
 | 
				
			||||||
 | 
									Signal:   SignalMemoryAvailable,
 | 
				
			||||||
 | 
									Operator: OpLessThan,
 | 
				
			||||||
 | 
									Value: ThresholdValue{
 | 
				
			||||||
 | 
										Quantity: quantityMustParse("2Gi"),
 | 
				
			||||||
 | 
									},
 | 
				
			||||||
 | 
									GracePeriod: time.Minute * 2,
 | 
				
			||||||
 | 
								},
 | 
				
			||||||
 | 
							},
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						summaryProvider := &fakeSummaryProvider{result: summaryStatsMaker("2Gi", podStats)}
 | 
				
			||||||
 | 
						manager := &managerImpl{
 | 
				
			||||||
 | 
							clock:           fakeClock,
 | 
				
			||||||
 | 
							killPodFunc:     podKiller.killPodNow,
 | 
				
			||||||
 | 
							imageGC:         imageGC,
 | 
				
			||||||
 | 
							config:          config,
 | 
				
			||||||
 | 
							recorder:        &record.FakeRecorder{},
 | 
				
			||||||
 | 
							summaryProvider: summaryProvider,
 | 
				
			||||||
 | 
							nodeRef:         nodeRef,
 | 
				
			||||||
 | 
							nodeConditionsLastObservedAt: nodeConditionsObservedAt{},
 | 
				
			||||||
 | 
							thresholdsFirstObservedAt:    thresholdsObservedAt{},
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						// Enable critical pod annotation feature gate
 | 
				
			||||||
 | 
						utilfeature.DefaultFeatureGate.Set("ExperimentalCriticalPodAnnotation=True")
 | 
				
			||||||
 | 
						// induce soft threshold
 | 
				
			||||||
 | 
						fakeClock.Step(1 * time.Minute)
 | 
				
			||||||
 | 
						summaryProvider.result = summaryStatsMaker("1500Mi", podStats)
 | 
				
			||||||
 | 
						manager.synchronize(diskInfoProvider, activePodsFunc)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						// we should have memory pressure
 | 
				
			||||||
 | 
						if !manager.IsUnderMemoryPressure() {
 | 
				
			||||||
 | 
							t.Errorf("Manager should report memory pressure since soft threshold was met")
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						// verify no pod was yet killed because there has not yet been enough time passed.
 | 
				
			||||||
 | 
						if podKiller.pod != nil {
 | 
				
			||||||
 | 
							t.Errorf("Manager should not have killed a pod yet, but killed: %v", podKiller.pod.Name)
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						// step forward in time pass the grace period
 | 
				
			||||||
 | 
						fakeClock.Step(3 * time.Minute)
 | 
				
			||||||
 | 
						summaryProvider.result = summaryStatsMaker("1500Mi", podStats)
 | 
				
			||||||
 | 
						manager.synchronize(diskInfoProvider, activePodsFunc)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						// we should have memory pressure
 | 
				
			||||||
 | 
						if !manager.IsUnderMemoryPressure() {
 | 
				
			||||||
 | 
							t.Errorf("Manager should report memory pressure since soft threshold was met")
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						// verify the right pod was killed with the right grace period.
 | 
				
			||||||
 | 
						if podKiller.pod == podToEvict {
 | 
				
			||||||
 | 
							t.Errorf("Manager chose to kill critical pod: %v, but should have ignored it", podKiller.pod.Name)
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						// reset state
 | 
				
			||||||
 | 
						podKiller.pod = nil
 | 
				
			||||||
 | 
						podKiller.gracePeriodOverride = nil
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						// remove memory pressure
 | 
				
			||||||
 | 
						fakeClock.Step(20 * time.Minute)
 | 
				
			||||||
 | 
						summaryProvider.result = summaryStatsMaker("3Gi", podStats)
 | 
				
			||||||
 | 
						manager.synchronize(diskInfoProvider, activePodsFunc)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						// we should not have memory pressure
 | 
				
			||||||
 | 
						if manager.IsUnderMemoryPressure() {
 | 
				
			||||||
 | 
							t.Errorf("Manager should not report memory pressure")
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						// Disable critical pod annotation feature gate
 | 
				
			||||||
 | 
						utilfeature.DefaultFeatureGate.Set("ExperimentalCriticalPodAnnotation=False")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						// induce memory pressure!
 | 
				
			||||||
 | 
						fakeClock.Step(1 * time.Minute)
 | 
				
			||||||
 | 
						summaryProvider.result = summaryStatsMaker("500Mi", podStats)
 | 
				
			||||||
 | 
						manager.synchronize(diskInfoProvider, activePodsFunc)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						// we should have memory pressure
 | 
				
			||||||
 | 
						if !manager.IsUnderMemoryPressure() {
 | 
				
			||||||
 | 
							t.Errorf("Manager should report memory pressure")
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						// check the right pod was killed
 | 
				
			||||||
 | 
						if podKiller.pod != podToEvict {
 | 
				
			||||||
 | 
							t.Errorf("Manager chose to kill pod: %v, but should have chosen %v", podKiller.pod.Name, podToEvict.Name)
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -1898,21 +1898,8 @@ func (kl *Kubelet) handleMirrorPod(mirrorPod *v1.Pod, start time.Time) {
 | 
				
			|||||||
// a config source.
 | 
					// a config source.
 | 
				
			||||||
func (kl *Kubelet) HandlePodAdditions(pods []*v1.Pod) {
 | 
					func (kl *Kubelet) HandlePodAdditions(pods []*v1.Pod) {
 | 
				
			||||||
	start := kl.clock.Now()
 | 
						start := kl.clock.Now()
 | 
				
			||||||
 | 
						sort.Sort(sliceutils.PodsByCreationTime(pods))
 | 
				
			||||||
	// Pass critical pods through admission check first.
 | 
						for _, pod := range pods {
 | 
				
			||||||
	var criticalPods []*v1.Pod
 | 
					 | 
				
			||||||
	var nonCriticalPods []*v1.Pod
 | 
					 | 
				
			||||||
	for _, p := range pods {
 | 
					 | 
				
			||||||
		if kubetypes.IsCriticalPod(p) {
 | 
					 | 
				
			||||||
			criticalPods = append(criticalPods, p)
 | 
					 | 
				
			||||||
		} else {
 | 
					 | 
				
			||||||
			nonCriticalPods = append(nonCriticalPods, p)
 | 
					 | 
				
			||||||
		}
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
	sort.Sort(sliceutils.PodsByCreationTime(criticalPods))
 | 
					 | 
				
			||||||
	sort.Sort(sliceutils.PodsByCreationTime(nonCriticalPods))
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	for _, pod := range append(criticalPods, nonCriticalPods...) {
 | 
					 | 
				
			||||||
		existingPods := kl.podManager.GetPods()
 | 
							existingPods := kl.podManager.GetPods()
 | 
				
			||||||
		// Always add the pod to the pod manager. Kubelet relies on the pod
 | 
							// Always add the pod to the pod manager. Kubelet relies on the pod
 | 
				
			||||||
		// manager as the source of truth for the desired state. If a pod does
 | 
							// manager as the source of truth for the desired state. If a pod does
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -479,69 +479,6 @@ func TestHandlePortConflicts(t *testing.T) {
 | 
				
			|||||||
	require.Equal(t, v1.PodPending, status.Phase)
 | 
						require.Equal(t, v1.PodPending, status.Phase)
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
// Tests that we sort pods based on criticality.
 | 
					 | 
				
			||||||
func TestCriticalPrioritySorting(t *testing.T) {
 | 
					 | 
				
			||||||
	testKubelet := newTestKubelet(t, false /* controllerAttachDetachEnabled */)
 | 
					 | 
				
			||||||
	kl := testKubelet.kubelet
 | 
					 | 
				
			||||||
	nodes := []v1.Node{
 | 
					 | 
				
			||||||
		{ObjectMeta: metav1.ObjectMeta{Name: testKubeletHostname},
 | 
					 | 
				
			||||||
			Status: v1.NodeStatus{Capacity: v1.ResourceList{}, Allocatable: v1.ResourceList{
 | 
					 | 
				
			||||||
				v1.ResourceCPU:    *resource.NewMilliQuantity(10, resource.DecimalSI),
 | 
					 | 
				
			||||||
				v1.ResourceMemory: *resource.NewQuantity(100, resource.BinarySI),
 | 
					 | 
				
			||||||
				v1.ResourcePods:   *resource.NewQuantity(40, resource.DecimalSI),
 | 
					 | 
				
			||||||
			}}},
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
	kl.nodeLister = testNodeLister{nodes: nodes}
 | 
					 | 
				
			||||||
	kl.nodeInfo = testNodeInfo{nodes: nodes}
 | 
					 | 
				
			||||||
	testKubelet.fakeCadvisor.On("MachineInfo").Return(&cadvisorapi.MachineInfo{}, nil)
 | 
					 | 
				
			||||||
	testKubelet.fakeCadvisor.On("ImagesFsInfo").Return(cadvisorapiv2.FsInfo{}, nil)
 | 
					 | 
				
			||||||
	testKubelet.fakeCadvisor.On("RootFsInfo").Return(cadvisorapiv2.FsInfo{}, nil)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	spec := v1.PodSpec{NodeName: string(kl.nodeName),
 | 
					 | 
				
			||||||
		Containers: []v1.Container{{Resources: v1.ResourceRequirements{
 | 
					 | 
				
			||||||
			Requests: v1.ResourceList{
 | 
					 | 
				
			||||||
				"memory": resource.MustParse("90"),
 | 
					 | 
				
			||||||
			},
 | 
					 | 
				
			||||||
		}}},
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
	pods := []*v1.Pod{
 | 
					 | 
				
			||||||
		podWithUidNameNsSpec("000000000", "newpod", "foo", spec),
 | 
					 | 
				
			||||||
		podWithUidNameNsSpec("987654321", "oldpod", "foo", spec),
 | 
					 | 
				
			||||||
		podWithUidNameNsSpec("123456789", "middlepod", "foo", spec),
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	// Pods are not sorted by creation time.
 | 
					 | 
				
			||||||
	startTime := time.Now()
 | 
					 | 
				
			||||||
	pods[0].CreationTimestamp = metav1.NewTime(startTime.Add(10 * time.Second))
 | 
					 | 
				
			||||||
	pods[1].CreationTimestamp = metav1.NewTime(startTime)
 | 
					 | 
				
			||||||
	pods[2].CreationTimestamp = metav1.NewTime(startTime.Add(1 * time.Second))
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	// Make the middle and new pod critical, the middle pod should win
 | 
					 | 
				
			||||||
	// even though it comes later in the list
 | 
					 | 
				
			||||||
	critical := map[string]string{kubetypes.CriticalPodAnnotationKey: ""}
 | 
					 | 
				
			||||||
	pods[0].Annotations = critical
 | 
					 | 
				
			||||||
	pods[1].Annotations = map[string]string{}
 | 
					 | 
				
			||||||
	pods[2].Annotations = critical
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	// The non-critical pod should be rejected
 | 
					 | 
				
			||||||
	notfittingPods := []*v1.Pod{pods[0], pods[1]}
 | 
					 | 
				
			||||||
	fittingPod := pods[2]
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	kl.HandlePodAdditions(pods)
 | 
					 | 
				
			||||||
	// Check pod status stored in the status map.
 | 
					 | 
				
			||||||
	// notfittingPod should be Failed
 | 
					 | 
				
			||||||
	for _, p := range notfittingPods {
 | 
					 | 
				
			||||||
		status, found := kl.statusManager.GetPodStatus(p.UID)
 | 
					 | 
				
			||||||
		require.True(t, found, "Status of pod %q is not found in the status map", p.UID)
 | 
					 | 
				
			||||||
		require.Equal(t, v1.PodFailed, status.Phase)
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	// fittingPod should be Pending
 | 
					 | 
				
			||||||
	status, found := kl.statusManager.GetPodStatus(fittingPod.UID)
 | 
					 | 
				
			||||||
	require.True(t, found, "Status of pod %q is not found in the status map", fittingPod.UID)
 | 
					 | 
				
			||||||
	require.Equal(t, v1.PodPending, status.Phase)
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
// Tests that we handle host name conflicts correctly by setting the failed status in status map.
 | 
					// Tests that we handle host name conflicts correctly by setting the failed status in status map.
 | 
				
			||||||
func TestHandleHostNameConflicts(t *testing.T) {
 | 
					func TestHandleHostNameConflicts(t *testing.T) {
 | 
				
			||||||
	testKubelet := newTestKubelet(t, false /* controllerAttachDetachEnabled */)
 | 
						testKubelet := newTestKubelet(t, false /* controllerAttachDetachEnabled */)
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -8,6 +8,21 @@ load(
 | 
				
			|||||||
    "go_test",
 | 
					    "go_test",
 | 
				
			||||||
)
 | 
					)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					go_test(
 | 
				
			||||||
 | 
					    name = "go_default_test",
 | 
				
			||||||
 | 
					    srcs = [
 | 
				
			||||||
 | 
					        "policy_test.go",
 | 
				
			||||||
 | 
					        "qos_test.go",
 | 
				
			||||||
 | 
					    ],
 | 
				
			||||||
 | 
					    library = ":go_default_library",
 | 
				
			||||||
 | 
					    tags = ["automanaged"],
 | 
				
			||||||
 | 
					    deps = [
 | 
				
			||||||
 | 
					        "//pkg/api/v1:go_default_library",
 | 
				
			||||||
 | 
					        "//vendor:k8s.io/apimachinery/pkg/api/resource",
 | 
				
			||||||
 | 
					        "//vendor:k8s.io/apimachinery/pkg/apis/meta/v1",
 | 
				
			||||||
 | 
					    ],
 | 
				
			||||||
 | 
					)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
go_library(
 | 
					go_library(
 | 
				
			||||||
    name = "go_default_library",
 | 
					    name = "go_default_library",
 | 
				
			||||||
    srcs = [
 | 
					    srcs = [
 | 
				
			||||||
@@ -19,28 +34,11 @@ go_library(
 | 
				
			|||||||
    deps = [
 | 
					    deps = [
 | 
				
			||||||
        "//pkg/api:go_default_library",
 | 
					        "//pkg/api:go_default_library",
 | 
				
			||||||
        "//pkg/api/v1:go_default_library",
 | 
					        "//pkg/api/v1:go_default_library",
 | 
				
			||||||
        "//pkg/kubelet/types:go_default_library",
 | 
					 | 
				
			||||||
        "//vendor:k8s.io/apimachinery/pkg/api/resource",
 | 
					        "//vendor:k8s.io/apimachinery/pkg/api/resource",
 | 
				
			||||||
        "//vendor:k8s.io/apimachinery/pkg/util/sets",
 | 
					        "//vendor:k8s.io/apimachinery/pkg/util/sets",
 | 
				
			||||||
    ],
 | 
					    ],
 | 
				
			||||||
)
 | 
					)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
go_test(
 | 
					 | 
				
			||||||
    name = "go_default_test",
 | 
					 | 
				
			||||||
    srcs = [
 | 
					 | 
				
			||||||
        "policy_test.go",
 | 
					 | 
				
			||||||
        "qos_test.go",
 | 
					 | 
				
			||||||
    ],
 | 
					 | 
				
			||||||
    library = ":go_default_library",
 | 
					 | 
				
			||||||
    tags = ["automanaged"],
 | 
					 | 
				
			||||||
    deps = [
 | 
					 | 
				
			||||||
        "//pkg/api/v1:go_default_library",
 | 
					 | 
				
			||||||
        "//pkg/kubelet/types:go_default_library",
 | 
					 | 
				
			||||||
        "//vendor:k8s.io/apimachinery/pkg/api/resource",
 | 
					 | 
				
			||||||
        "//vendor:k8s.io/apimachinery/pkg/apis/meta/v1",
 | 
					 | 
				
			||||||
    ],
 | 
					 | 
				
			||||||
)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
filegroup(
 | 
					filegroup(
 | 
				
			||||||
    name = "package-srcs",
 | 
					    name = "package-srcs",
 | 
				
			||||||
    srcs = glob(["**"]),
 | 
					    srcs = glob(["**"]),
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -16,20 +16,14 @@ limitations under the License.
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
package qos
 | 
					package qos
 | 
				
			||||||
 | 
					
 | 
				
			||||||
import (
 | 
					import "k8s.io/kubernetes/pkg/api/v1"
 | 
				
			||||||
	"k8s.io/kubernetes/pkg/api/v1"
 | 
					 | 
				
			||||||
	kubetypes "k8s.io/kubernetes/pkg/kubelet/types"
 | 
					 | 
				
			||||||
)
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
const (
 | 
					const (
 | 
				
			||||||
	// PodInfraOOMAdj is very docker specific. For arbitrary runtime, it may not make
 | 
						// PodInfraOOMAdj is very docker specific. For arbitrary runtime, it may not make
 | 
				
			||||||
	// sense to set sandbox level oom score, e.g. a sandbox could only be a namespace
 | 
						// sense to set sandbox level oom score, e.g. a sandbox could only be a namespace
 | 
				
			||||||
	// without a process.
 | 
						// without a process.
 | 
				
			||||||
	// TODO: Handle infra container oom score adj in a runtime agnostic way.
 | 
						// TODO: Handle infra container oom score adj in a runtime agnostic way.
 | 
				
			||||||
	// TODO: Should handle critical pod oom score adj with a proper preemption priority.
 | 
					 | 
				
			||||||
	// This is the workaround for https://github.com/kubernetes/kubernetes/issues/38322.
 | 
					 | 
				
			||||||
	PodInfraOOMAdj        int = -998
 | 
						PodInfraOOMAdj        int = -998
 | 
				
			||||||
	CriticalPodOOMAdj     int = -998
 | 
					 | 
				
			||||||
	KubeletOOMScoreAdj    int = -999
 | 
						KubeletOOMScoreAdj    int = -999
 | 
				
			||||||
	DockerOOMScoreAdj     int = -999
 | 
						DockerOOMScoreAdj     int = -999
 | 
				
			||||||
	KubeProxyOOMScoreAdj  int = -999
 | 
						KubeProxyOOMScoreAdj  int = -999
 | 
				
			||||||
@@ -44,10 +38,6 @@ const (
 | 
				
			|||||||
// and 1000. Containers with higher OOM scores are killed if the system runs out of memory.
 | 
					// and 1000. Containers with higher OOM scores are killed if the system runs out of memory.
 | 
				
			||||||
// See https://lwn.net/Articles/391222/ for more information.
 | 
					// See https://lwn.net/Articles/391222/ for more information.
 | 
				
			||||||
func GetContainerOOMScoreAdjust(pod *v1.Pod, container *v1.Container, memoryCapacity int64) int {
 | 
					func GetContainerOOMScoreAdjust(pod *v1.Pod, container *v1.Container, memoryCapacity int64) int {
 | 
				
			||||||
	if kubetypes.IsCriticalPod(pod) {
 | 
					 | 
				
			||||||
		return CriticalPodOOMAdj
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	switch GetPodQOS(pod) {
 | 
						switch GetPodQOS(pod) {
 | 
				
			||||||
	case v1.PodQOSGuaranteed:
 | 
						case v1.PodQOSGuaranteed:
 | 
				
			||||||
		// Guaranteed containers should be the last to get killed.
 | 
							// Guaranteed containers should be the last to get killed.
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -21,9 +21,7 @@ import (
 | 
				
			|||||||
	"testing"
 | 
						"testing"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	"k8s.io/apimachinery/pkg/api/resource"
 | 
						"k8s.io/apimachinery/pkg/api/resource"
 | 
				
			||||||
	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 | 
					 | 
				
			||||||
	"k8s.io/kubernetes/pkg/api/v1"
 | 
						"k8s.io/kubernetes/pkg/api/v1"
 | 
				
			||||||
	kubetypes "k8s.io/kubernetes/pkg/kubelet/types"
 | 
					 | 
				
			||||||
)
 | 
					)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
const (
 | 
					const (
 | 
				
			||||||
@@ -137,25 +135,6 @@ var (
 | 
				
			|||||||
			},
 | 
								},
 | 
				
			||||||
		},
 | 
							},
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
	criticalPodWithNoLimit = v1.Pod{
 | 
					 | 
				
			||||||
		ObjectMeta: metav1.ObjectMeta{
 | 
					 | 
				
			||||||
			Annotations: map[string]string{
 | 
					 | 
				
			||||||
				kubetypes.CriticalPodAnnotationKey: "",
 | 
					 | 
				
			||||||
			},
 | 
					 | 
				
			||||||
		},
 | 
					 | 
				
			||||||
		Spec: v1.PodSpec{
 | 
					 | 
				
			||||||
			Containers: []v1.Container{
 | 
					 | 
				
			||||||
				{
 | 
					 | 
				
			||||||
					Resources: v1.ResourceRequirements{
 | 
					 | 
				
			||||||
						Requests: v1.ResourceList{
 | 
					 | 
				
			||||||
							v1.ResourceName(v1.ResourceMemory): resource.MustParse(strconv.Itoa(standardMemoryAmount - 1)),
 | 
					 | 
				
			||||||
							v1.ResourceName(v1.ResourceCPU):    resource.MustParse("5m"),
 | 
					 | 
				
			||||||
						},
 | 
					 | 
				
			||||||
					},
 | 
					 | 
				
			||||||
				},
 | 
					 | 
				
			||||||
			},
 | 
					 | 
				
			||||||
		},
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
)
 | 
					)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
type oomTest struct {
 | 
					type oomTest struct {
 | 
				
			||||||
@@ -209,12 +188,6 @@ func TestGetContainerOOMScoreAdjust(t *testing.T) {
 | 
				
			|||||||
			lowOOMScoreAdj:  2,
 | 
								lowOOMScoreAdj:  2,
 | 
				
			||||||
			highOOMScoreAdj: 2,
 | 
								highOOMScoreAdj: 2,
 | 
				
			||||||
		},
 | 
							},
 | 
				
			||||||
		{
 | 
					 | 
				
			||||||
			pod:             &criticalPodWithNoLimit,
 | 
					 | 
				
			||||||
			memoryCapacity:  standardMemoryAmount,
 | 
					 | 
				
			||||||
			lowOOMScoreAdj:  -998,
 | 
					 | 
				
			||||||
			highOOMScoreAdj: -998,
 | 
					 | 
				
			||||||
		},
 | 
					 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
	for _, test := range oomTests {
 | 
						for _, test := range oomTests {
 | 
				
			||||||
		oomScoreAdj := GetContainerOOMScoreAdjust(test.pod, &test.pod.Spec.Containers[0], test.memoryCapacity)
 | 
							oomScoreAdj := GetContainerOOMScoreAdjust(test.pod, &test.pod.Spec.Containers[0], test.memoryCapacity)
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -19,6 +19,7 @@ go_library(
 | 
				
			|||||||
    ],
 | 
					    ],
 | 
				
			||||||
    tags = ["automanaged"],
 | 
					    tags = ["automanaged"],
 | 
				
			||||||
    deps = [
 | 
					    deps = [
 | 
				
			||||||
 | 
					        "//pkg/api:go_default_library",
 | 
				
			||||||
        "//pkg/api/v1:go_default_library",
 | 
					        "//pkg/api/v1:go_default_library",
 | 
				
			||||||
        "//vendor:k8s.io/apimachinery/pkg/apis/meta/v1",
 | 
					        "//vendor:k8s.io/apimachinery/pkg/apis/meta/v1",
 | 
				
			||||||
    ],
 | 
					    ],
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -20,22 +20,17 @@ import (
 | 
				
			|||||||
	"fmt"
 | 
						"fmt"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 | 
						metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 | 
				
			||||||
 | 
						kubeapi "k8s.io/kubernetes/pkg/api"
 | 
				
			||||||
	"k8s.io/kubernetes/pkg/api/v1"
 | 
						"k8s.io/kubernetes/pkg/api/v1"
 | 
				
			||||||
)
 | 
					)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
const ConfigSourceAnnotationKey = "kubernetes.io/config.source"
 | 
					const (
 | 
				
			||||||
const ConfigMirrorAnnotationKey = "kubernetes.io/config.mirror"
 | 
						ConfigSourceAnnotationKey    = "kubernetes.io/config.source"
 | 
				
			||||||
const ConfigFirstSeenAnnotationKey = "kubernetes.io/config.seen"
 | 
						ConfigMirrorAnnotationKey    = "kubernetes.io/config.mirror"
 | 
				
			||||||
const ConfigHashAnnotationKey = "kubernetes.io/config.hash"
 | 
						ConfigFirstSeenAnnotationKey = "kubernetes.io/config.seen"
 | 
				
			||||||
 | 
						ConfigHashAnnotationKey      = "kubernetes.io/config.hash"
 | 
				
			||||||
// This key needs to sync with the key used by the rescheduler, which currently
 | 
						CriticalPodAnnotationKey     = "scheduler.alpha.kubernetes.io/critical-pod"
 | 
				
			||||||
// lives in contrib. Its presence indicates 2 things, as far as the kubelet is
 | 
					)
 | 
				
			||||||
// concerned:
 | 
					 | 
				
			||||||
// 1. Resource related admission checks will prioritize the admission of
 | 
					 | 
				
			||||||
//    pods bearing the key, over pods without the key, regardless of QoS.
 | 
					 | 
				
			||||||
// 2. The OOM score of pods bearing the key will be <= pods without
 | 
					 | 
				
			||||||
//    the key (where the <= part is determied by QoS).
 | 
					 | 
				
			||||||
const CriticalPodAnnotationKey = "scheduler.alpha.kubernetes.io/critical-pod"
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
// PodOperation defines what changes will be made on a pod configuration.
 | 
					// PodOperation defines what changes will be made on a pod configuration.
 | 
				
			||||||
type PodOperation int
 | 
					type PodOperation int
 | 
				
			||||||
@@ -146,6 +141,13 @@ func (sp SyncPodType) String() string {
 | 
				
			|||||||
// key. Both the rescheduler and the kubelet use this key to make admission
 | 
					// key. Both the rescheduler and the kubelet use this key to make admission
 | 
				
			||||||
// and scheduling decisions.
 | 
					// and scheduling decisions.
 | 
				
			||||||
func IsCriticalPod(pod *v1.Pod) bool {
 | 
					func IsCriticalPod(pod *v1.Pod) bool {
 | 
				
			||||||
	_, ok := pod.Annotations[CriticalPodAnnotationKey]
 | 
						// Critical pods are restricted to "kube-system" namespace as of now.
 | 
				
			||||||
	return ok
 | 
						if pod.Namespace != kubeapi.NamespaceSystem {
 | 
				
			||||||
 | 
							return false
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						val, ok := pod.Annotations[CriticalPodAnnotationKey]
 | 
				
			||||||
 | 
						if ok && val == "" {
 | 
				
			||||||
 | 
							return true
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						return false
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user