mirror of
				https://github.com/optim-enterprises-bv/kubernetes.git
				synced 2025-10-31 02:08:13 +00:00 
			
		
		
		
	In-place Pod Vertical Scaling - core implementation
1. Core Kubelet changes to implement In-place Pod Vertical Scaling. 2. E2E tests for In-place Pod Vertical Scaling. 3. Refactor kubelet code and add missing tests (Derek's kubelet review) 4. Add a new hash over container fields without Resources field to allow feature gate toggling without restarting containers not using the feature. 5. Fix corner-case where resize A->B->A gets ignored 6. Add cgroup v2 support to pod resize E2E test. KEP: /enhancements/keps/sig-node/1287-in-place-update-pod-resources Co-authored-by: Chen Wang <Chen.Wang1@ibm.com>
This commit is contained in:
		 Vinay Kulkarni
					Vinay Kulkarni
				
			
				
					committed by
					
						 vinay kulkarni
						vinay kulkarni
					
				
			
			
				
	
			
			
			 vinay kulkarni
						vinay kulkarni
					
				
			
						parent
						
							231849a908
						
					
				
				
					commit
					f2bd94a0de
				
			| @@ -22,6 +22,7 @@ import ( | ||||
| 	"os" | ||||
| 	"path" | ||||
| 	"path/filepath" | ||||
| 	"strconv" | ||||
| 	"strings" | ||||
| 	"sync" | ||||
| 	"time" | ||||
| @@ -557,3 +558,86 @@ func (m *cgroupManagerImpl) MemoryUsage(name CgroupName) (int64, error) { | ||||
| 	val, err := fscommon.GetCgroupParamUint(path, file) | ||||
| 	return int64(val), err | ||||
| } | ||||
|  | ||||
| // Get the memory limit in bytes applied to the cgroup | ||||
| func (m *cgroupManagerImpl) GetCgroupMemoryConfig(name CgroupName) (uint64, error) { | ||||
| 	cgroupPaths := m.buildCgroupPaths(name) | ||||
| 	cgroupMemoryPath, found := cgroupPaths["memory"] | ||||
| 	if !found { | ||||
| 		return 0, fmt.Errorf("failed to build memory cgroup fs path for cgroup %v", name) | ||||
| 	} | ||||
| 	memLimit, err := fscommon.GetCgroupParamUint(cgroupMemoryPath, "memory.limit_in_bytes") | ||||
| 	if err != nil { | ||||
| 		return 0, fmt.Errorf("failed to get memory.limit_in_bytes for cgroup %v: %v", name, err) | ||||
| 	} | ||||
| 	return memLimit, nil | ||||
| } | ||||
|  | ||||
| // Get the cpu quota, cpu period, and cpu shares applied to the cgroup | ||||
| func (m *cgroupManagerImpl) GetCgroupCpuConfig(name CgroupName) (int64, uint64, uint64, error) { | ||||
| 	cgroupPaths := m.buildCgroupPaths(name) | ||||
| 	cgroupCpuPath, found := cgroupPaths["cpu"] | ||||
| 	if !found { | ||||
| 		return 0, 0, 0, fmt.Errorf("failed to build CPU cgroup fs path for cgroup %v", name) | ||||
| 	} | ||||
| 	cpuQuotaStr, errQ := fscommon.GetCgroupParamString(cgroupCpuPath, "cpu.cfs_quota_us") | ||||
| 	if errQ != nil { | ||||
| 		return 0, 0, 0, fmt.Errorf("failed to read CPU quota for cgroup %v: %v", name, errQ) | ||||
| 	} | ||||
| 	cpuQuota, errInt := strconv.ParseInt(cpuQuotaStr, 10, 64) | ||||
| 	if errInt != nil { | ||||
| 		return 0, 0, 0, fmt.Errorf("failed to convert CPU quota as integer for cgroup %v: %v", name, errInt) | ||||
| 	} | ||||
| 	cpuPeriod, errP := fscommon.GetCgroupParamUint(cgroupCpuPath, "cpu.cfs_period_us") | ||||
| 	if errP != nil { | ||||
| 		return 0, 0, 0, fmt.Errorf("failed to read CPU period for cgroup %v: %v", name, errP) | ||||
| 	} | ||||
| 	cpuShares, errS := fscommon.GetCgroupParamUint(cgroupCpuPath, "cpu.shares") | ||||
| 	if errP != nil { | ||||
| 		return 0, 0, 0, fmt.Errorf("failed to read CPU shares for cgroup %v: %v", name, errS) | ||||
| 	} | ||||
| 	return cpuQuota, cpuPeriod, cpuShares, nil | ||||
| } | ||||
|  | ||||
| // Set the memory limit in bytes applied to the cgroup | ||||
| func (m *cgroupManagerImpl) SetCgroupMemoryConfig(name CgroupName, memoryLimit int64) error { | ||||
| 	cgroupPaths := m.buildCgroupPaths(name) | ||||
| 	cgroupMemoryPath, found := cgroupPaths["memory"] | ||||
| 	if !found { | ||||
| 		return fmt.Errorf("failed to build memory cgroup fs path for cgroup %v", name) | ||||
| 	} | ||||
| 	memLimit := strconv.FormatInt(memoryLimit, 10) | ||||
| 	if err := os.WriteFile(filepath.Join(cgroupMemoryPath, "memory.limit_in_bytes"), []byte(memLimit), 0700); err != nil { | ||||
| 		return fmt.Errorf("failed to write %v to %v: %v", memLimit, cgroupMemoryPath, err) | ||||
| 	} | ||||
| 	return nil | ||||
| } | ||||
|  | ||||
| // Set the cpu quota, cpu period, and cpu shares applied to the cgroup | ||||
| func (m *cgroupManagerImpl) SetCgroupCpuConfig(name CgroupName, cpuQuota *int64, cpuPeriod, cpuShares *uint64) error { | ||||
| 	var cpuQuotaStr, cpuPeriodStr, cpuSharesStr string | ||||
| 	cgroupPaths := m.buildCgroupPaths(name) | ||||
| 	cgroupCpuPath, found := cgroupPaths["cpu"] | ||||
| 	if !found { | ||||
| 		return fmt.Errorf("failed to build cpu cgroup fs path for cgroup %v", name) | ||||
| 	} | ||||
| 	if cpuQuota != nil { | ||||
| 		cpuQuotaStr = strconv.FormatInt(*cpuQuota, 10) | ||||
| 		if err := os.WriteFile(filepath.Join(cgroupCpuPath, "cpu.cfs_quota_us"), []byte(cpuQuotaStr), 0700); err != nil { | ||||
| 			return fmt.Errorf("failed to write %v to %v: %v", cpuQuotaStr, cgroupCpuPath, err) | ||||
| 		} | ||||
| 	} | ||||
| 	if cpuPeriod != nil { | ||||
| 		cpuPeriodStr = strconv.FormatUint(*cpuPeriod, 10) | ||||
| 		if err := os.WriteFile(filepath.Join(cgroupCpuPath, "cpu.cfs_period_us"), []byte(cpuPeriodStr), 0700); err != nil { | ||||
| 			return fmt.Errorf("failed to write %v to %v: %v", cpuPeriodStr, cgroupCpuPath, err) | ||||
| 		} | ||||
| 	} | ||||
| 	if cpuShares != nil { | ||||
| 		cpuSharesStr = strconv.FormatUint(*cpuShares, 10) | ||||
| 		if err := os.WriteFile(filepath.Join(cgroupCpuPath, "cpu.shares"), []byte(cpuSharesStr), 0700); err != nil { | ||||
| 			return fmt.Errorf("failed to write %v to %v: %v", cpuSharesStr, cgroupCpuPath, err) | ||||
| 		} | ||||
| 	} | ||||
| 	return nil | ||||
| } | ||||
|   | ||||
| @@ -77,6 +77,22 @@ func (m *unsupportedCgroupManager) ReduceCPULimits(cgroupName CgroupName) error | ||||
| 	return nil | ||||
| } | ||||
|  | ||||
| func (m *unsupportedCgroupManager) GetCgroupMemoryConfig(name CgroupName) (uint64, error) { | ||||
| 	return 0, errNotSupported | ||||
| } | ||||
|  | ||||
| func (m *unsupportedCgroupManager) GetCgroupCpuConfig(name CgroupName) (int64, uint64, uint64, error) { | ||||
| 	return 0, 0, 0, errNotSupported | ||||
| } | ||||
|  | ||||
| func (m *unsupportedCgroupManager) SetCgroupMemoryConfig(name CgroupName, memoryLimit int64) error { | ||||
| 	return errNotSupported | ||||
| } | ||||
|  | ||||
| func (m *unsupportedCgroupManager) SetCgroupCpuConfig(name CgroupName, cpuQuota *int64, cpuPeriod, cpuShares *uint64) error { | ||||
| 	return errNotSupported | ||||
| } | ||||
|  | ||||
| var RootCgroupName = CgroupName([]string{}) | ||||
|  | ||||
| func NewCgroupName(base CgroupName, components ...string) CgroupName { | ||||
|   | ||||
| @@ -95,6 +95,22 @@ func (cm *containerManagerStub) GetDevicePluginResourceCapacity() (v1.ResourceLi | ||||
| 	return cm.extendedPluginResources, cm.extendedPluginResources, []string{} | ||||
| } | ||||
|  | ||||
| func (m *podContainerManagerStub) GetPodCgroupMemoryConfig(_ *v1.Pod) (uint64, error) { | ||||
| 	return 0, nil | ||||
| } | ||||
|  | ||||
| func (m *podContainerManagerStub) GetPodCgroupCpuConfig(_ *v1.Pod) (int64, uint64, uint64, error) { | ||||
| 	return 0, 0, 0, nil | ||||
| } | ||||
|  | ||||
| func (m *podContainerManagerStub) SetPodCgroupMemoryConfig(_ *v1.Pod, _ int64) error { | ||||
| 	return nil | ||||
| } | ||||
|  | ||||
| func (m *podContainerManagerStub) SetPodCgroupCpuConfig(_ *v1.Pod, _ *int64, _, _ *uint64) error { | ||||
| 	return nil | ||||
| } | ||||
|  | ||||
| func (cm *containerManagerStub) NewPodContainerManager() PodContainerManager { | ||||
| 	return &podContainerManagerStub{} | ||||
| } | ||||
|   | ||||
| @@ -20,9 +20,11 @@ import ( | ||||
| 	"fmt" | ||||
|  | ||||
| 	v1 "k8s.io/api/core/v1" | ||||
| 	utilfeature "k8s.io/apiserver/pkg/util/feature" | ||||
| 	"k8s.io/klog/v2" | ||||
|  | ||||
| 	podutil "k8s.io/kubernetes/pkg/api/v1/pod" | ||||
| 	v1qos "k8s.io/kubernetes/pkg/apis/core/v1/helper/qos" | ||||
| 	"k8s.io/kubernetes/pkg/features" | ||||
| 	"k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/state" | ||||
| 	"k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/topology" | ||||
| 	"k8s.io/kubernetes/pkg/kubelet/cm/cpuset" | ||||
| @@ -380,6 +382,11 @@ func (p *staticPolicy) guaranteedCPUs(pod *v1.Pod, container *v1.Container) int | ||||
| 		return 0 | ||||
| 	} | ||||
| 	cpuQuantity := container.Resources.Requests[v1.ResourceCPU] | ||||
| 	if utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) { | ||||
| 		if cs, ok := podutil.GetContainerStatus(pod.Status.ContainerStatuses, container.Name); ok { | ||||
| 			cpuQuantity = cs.ResourcesAllocated[v1.ResourceCPU] | ||||
| 		} | ||||
| 	} | ||||
| 	if cpuQuantity.Value()*1000 != cpuQuantity.MilliValue() { | ||||
| 		return 0 | ||||
| 	} | ||||
|   | ||||
| @@ -104,3 +104,38 @@ func (m *FakePodContainerManager) IsPodCgroup(cgroupfs string) (bool, types.UID) | ||||
| 	m.CalledFunctions = append(m.CalledFunctions, "IsPodCgroup") | ||||
| 	return false, types.UID("") | ||||
| } | ||||
|  | ||||
| func (cm *FakePodContainerManager) GetPodCgroupMemoryUsage(_ *v1.Pod) (uint64, error) { | ||||
| 	cm.Lock() | ||||
| 	defer cm.Unlock() | ||||
| 	cm.CalledFunctions = append(cm.CalledFunctions, "GetPodCgroupMemoryUsage") | ||||
| 	return 0, nil | ||||
| } | ||||
|  | ||||
| func (cm *FakePodContainerManager) GetPodCgroupMemoryConfig(_ *v1.Pod) (uint64, error) { | ||||
| 	cm.Lock() | ||||
| 	defer cm.Unlock() | ||||
| 	cm.CalledFunctions = append(cm.CalledFunctions, "GetPodCgroupMemoryConfig") | ||||
| 	return 0, nil | ||||
| } | ||||
|  | ||||
| func (cm *FakePodContainerManager) GetPodCgroupCpuConfig(_ *v1.Pod) (int64, uint64, uint64, error) { | ||||
| 	cm.Lock() | ||||
| 	defer cm.Unlock() | ||||
| 	cm.CalledFunctions = append(cm.CalledFunctions, "GetPodCgroupCpuConfig") | ||||
| 	return 0, 0, 0, nil | ||||
| } | ||||
|  | ||||
| func (cm *FakePodContainerManager) SetPodCgroupMemoryConfig(_ *v1.Pod, _ int64) error { | ||||
| 	cm.Lock() | ||||
| 	defer cm.Unlock() | ||||
| 	cm.CalledFunctions = append(cm.CalledFunctions, "SetPodCgroupMemoryConfig") | ||||
| 	return nil | ||||
| } | ||||
|  | ||||
| func (cm *FakePodContainerManager) SetPodCgroupCpuConfig(_ *v1.Pod, _ *int64, _, _ *uint64) error { | ||||
| 	cm.Lock() | ||||
| 	defer cm.Unlock() | ||||
| 	cm.CalledFunctions = append(cm.CalledFunctions, "SetPodCgroupCpuConfig") | ||||
| 	return nil | ||||
| } | ||||
|   | ||||
| @@ -28,6 +28,7 @@ import ( | ||||
| 	v1 "k8s.io/api/core/v1" | ||||
| 	"k8s.io/apimachinery/pkg/types" | ||||
| 	utilfeature "k8s.io/apiserver/pkg/util/feature" | ||||
| 	podutil "k8s.io/kubernetes/pkg/api/v1/pod" | ||||
| 	"k8s.io/kubernetes/pkg/api/v1/resource" | ||||
| 	v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper" | ||||
| 	v1qos "k8s.io/kubernetes/pkg/apis/core/v1/helper/qos" | ||||
| @@ -151,6 +152,11 @@ func ResourceConfigForPod(pod *v1.Pod, enforceCPULimits bool, cpuPeriod uint64, | ||||
| 			memoryLimitsDeclared = false | ||||
| 		} | ||||
| 		containerHugePageLimits := HugePageLimits(container.Resources.Requests) | ||||
| 		if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.InPlacePodVerticalScaling) { | ||||
| 			if cs, ok := podutil.GetContainerStatus(pod.Status.ContainerStatuses, container.Name); ok { | ||||
| 				containerHugePageLimits = HugePageLimits(cs.ResourcesAllocated) | ||||
| 			} | ||||
| 		} | ||||
| 		for k, v := range containerHugePageLimits { | ||||
| 			if value, exists := hugePageLimits[k]; exists { | ||||
| 				hugePageLimits[k] = value + v | ||||
|   | ||||
| @@ -25,10 +25,13 @@ import ( | ||||
| ) | ||||
|  | ||||
| const ( | ||||
| 	MinShares     = 0 | ||||
| 	MinShares = 0 | ||||
| 	MaxShares = 0 | ||||
|  | ||||
| 	SharesPerCPU  = 0 | ||||
| 	MilliCPUToCPU = 0 | ||||
|  | ||||
| 	QuotaPeriod    = 0 | ||||
| 	MinQuotaPeriod = 0 | ||||
| ) | ||||
|  | ||||
|   | ||||
| @@ -25,9 +25,12 @@ import ( | ||||
|  | ||||
| 	v1 "k8s.io/api/core/v1" | ||||
| 	"k8s.io/apimachinery/pkg/api/resource" | ||||
| 	utilfeature "k8s.io/apiserver/pkg/util/feature" | ||||
| 	"k8s.io/klog/v2" | ||||
| 	podutil "k8s.io/kubernetes/pkg/api/v1/pod" | ||||
| 	corehelper "k8s.io/kubernetes/pkg/apis/core/v1/helper" | ||||
| 	v1qos "k8s.io/kubernetes/pkg/apis/core/v1/helper/qos" | ||||
| 	"k8s.io/kubernetes/pkg/features" | ||||
| 	"k8s.io/kubernetes/pkg/kubelet/cm/memorymanager/state" | ||||
| 	"k8s.io/kubernetes/pkg/kubelet/cm/topologymanager" | ||||
| 	"k8s.io/kubernetes/pkg/kubelet/cm/topologymanager/bitmask" | ||||
| @@ -107,7 +110,7 @@ func (p *staticPolicy) Allocate(s state.State, pod *v1.Pod, container *v1.Contai | ||||
| 	hint := p.affinity.GetAffinity(podUID, container.Name) | ||||
| 	klog.InfoS("Got topology affinity", "pod", klog.KObj(pod), "podUID", pod.UID, "containerName", container.Name, "hint", hint) | ||||
|  | ||||
| 	requestedResources, err := getRequestedResources(container) | ||||
| 	requestedResources, err := getRequestedResources(pod, container) | ||||
| 	if err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| @@ -319,7 +322,7 @@ func getPodRequestedResources(pod *v1.Pod) (map[v1.ResourceName]uint64, error) { | ||||
| 	reqRsrcsByAppCtrs := make(map[v1.ResourceName]uint64) | ||||
|  | ||||
| 	for _, ctr := range pod.Spec.InitContainers { | ||||
| 		reqRsrcs, err := getRequestedResources(&ctr) | ||||
| 		reqRsrcs, err := getRequestedResources(pod, &ctr) | ||||
|  | ||||
| 		if err != nil { | ||||
| 			return nil, err | ||||
| @@ -336,7 +339,7 @@ func getPodRequestedResources(pod *v1.Pod) (map[v1.ResourceName]uint64, error) { | ||||
| 	} | ||||
|  | ||||
| 	for _, ctr := range pod.Spec.Containers { | ||||
| 		reqRsrcs, err := getRequestedResources(&ctr) | ||||
| 		reqRsrcs, err := getRequestedResources(pod, &ctr) | ||||
|  | ||||
| 		if err != nil { | ||||
| 			return nil, err | ||||
| @@ -391,7 +394,7 @@ func (p *staticPolicy) GetTopologyHints(s state.State, pod *v1.Pod, container *v | ||||
| 		return nil | ||||
| 	} | ||||
|  | ||||
| 	requestedResources, err := getRequestedResources(container) | ||||
| 	requestedResources, err := getRequestedResources(pod, container) | ||||
| 	if err != nil { | ||||
| 		klog.ErrorS(err, "Failed to get container requested resources", "pod", klog.KObj(pod), "podUID", pod.UID, "containerName", container.Name) | ||||
| 		return nil | ||||
| @@ -408,9 +411,15 @@ func (p *staticPolicy) GetTopologyHints(s state.State, pod *v1.Pod, container *v | ||||
| 	return p.calculateHints(s.GetMachineState(), pod, requestedResources) | ||||
| } | ||||
|  | ||||
| func getRequestedResources(container *v1.Container) (map[v1.ResourceName]uint64, error) { | ||||
| func getRequestedResources(pod *v1.Pod, container *v1.Container) (map[v1.ResourceName]uint64, error) { | ||||
| 	requestedResources := map[v1.ResourceName]uint64{} | ||||
| 	for resourceName, quantity := range container.Resources.Requests { | ||||
| 	resources := container.Resources.Requests | ||||
| 	if utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) { | ||||
| 		if cs, ok := podutil.GetContainerStatus(pod.Status.ContainerStatuses, container.Name); ok { | ||||
| 			resources = cs.ResourcesAllocated | ||||
| 		} | ||||
| 	} | ||||
| 	for resourceName, quantity := range resources { | ||||
| 		if resourceName != v1.ResourceMemory && !corehelper.IsHugePageResourceName(resourceName) { | ||||
| 			continue | ||||
| 		} | ||||
|   | ||||
| @@ -120,6 +120,35 @@ func (m *podContainerManagerImpl) GetPodContainerName(pod *v1.Pod) (CgroupName, | ||||
| 	return cgroupName, cgroupfsName | ||||
| } | ||||
|  | ||||
| func (m *podContainerManagerImpl) GetPodCgroupMemoryUsage(pod *v1.Pod) (uint64, error) { | ||||
| 	podCgroupName, _ := m.GetPodContainerName(pod) | ||||
| 	memUsage, err := m.cgroupManager.MemoryUsage(podCgroupName) | ||||
| 	if err != nil { | ||||
| 		return 0, err | ||||
| 	} | ||||
| 	return uint64(memUsage), nil | ||||
| } | ||||
|  | ||||
| func (m *podContainerManagerImpl) GetPodCgroupMemoryConfig(pod *v1.Pod) (uint64, error) { | ||||
| 	podCgroupName, _ := m.GetPodContainerName(pod) | ||||
| 	return m.cgroupManager.GetCgroupMemoryConfig(podCgroupName) | ||||
| } | ||||
|  | ||||
| func (m *podContainerManagerImpl) GetPodCgroupCpuConfig(pod *v1.Pod) (int64, uint64, uint64, error) { | ||||
| 	podCgroupName, _ := m.GetPodContainerName(pod) | ||||
| 	return m.cgroupManager.GetCgroupCpuConfig(podCgroupName) | ||||
| } | ||||
|  | ||||
| func (m *podContainerManagerImpl) SetPodCgroupMemoryConfig(pod *v1.Pod, memoryLimit int64) error { | ||||
| 	podCgroupName, _ := m.GetPodContainerName(pod) | ||||
| 	return m.cgroupManager.SetCgroupMemoryConfig(podCgroupName, memoryLimit) | ||||
| } | ||||
|  | ||||
| func (m *podContainerManagerImpl) SetPodCgroupCpuConfig(pod *v1.Pod, cpuQuota *int64, cpuPeriod, cpuShares *uint64) error { | ||||
| 	podCgroupName, _ := m.GetPodContainerName(pod) | ||||
| 	return m.cgroupManager.SetCgroupCpuConfig(podCgroupName, cpuQuota, cpuPeriod, cpuShares) | ||||
| } | ||||
|  | ||||
| // Kill one process ID | ||||
| func (m *podContainerManagerImpl) killOnePid(pid int) error { | ||||
| 	// os.FindProcess never returns an error on POSIX | ||||
| @@ -322,3 +351,23 @@ func (m *podContainerManagerNoop) GetAllPodsFromCgroups() (map[types.UID]CgroupN | ||||
| func (m *podContainerManagerNoop) IsPodCgroup(cgroupfs string) (bool, types.UID) { | ||||
| 	return false, types.UID("") | ||||
| } | ||||
|  | ||||
| func (m *podContainerManagerNoop) GetPodCgroupMemoryUsage(_ *v1.Pod) (uint64, error) { | ||||
| 	return 0, nil | ||||
| } | ||||
|  | ||||
| func (m *podContainerManagerNoop) GetPodCgroupMemoryConfig(_ *v1.Pod) (uint64, error) { | ||||
| 	return 0, nil | ||||
| } | ||||
|  | ||||
| func (m *podContainerManagerNoop) GetPodCgroupCpuConfig(_ *v1.Pod) (int64, uint64, uint64, error) { | ||||
| 	return 0, 0, 0, nil | ||||
| } | ||||
|  | ||||
| func (m *podContainerManagerNoop) SetPodCgroupMemoryConfig(_ *v1.Pod, _ int64) error { | ||||
| 	return nil | ||||
| } | ||||
|  | ||||
| func (m *podContainerManagerNoop) SetPodCgroupCpuConfig(_ *v1.Pod, _ *int64, _, _ *uint64) error { | ||||
| 	return nil | ||||
| } | ||||
|   | ||||
| @@ -53,3 +53,23 @@ func (m *podContainerManagerStub) GetAllPodsFromCgroups() (map[types.UID]CgroupN | ||||
| func (m *podContainerManagerStub) IsPodCgroup(cgroupfs string) (bool, types.UID) { | ||||
| 	return false, types.UID("") | ||||
| } | ||||
|  | ||||
| func (m *podContainerManagerStub) GetPodCgroupMemoryUsage(_ *v1.Pod) (uint64, error) { | ||||
| 	return 0, nil | ||||
| } | ||||
|  | ||||
| func (m *podContainerManagerStub) GetPodCgroupMemoryLimit(_ *v1.Pod) (uint64, error) { | ||||
| 	return 0, nil | ||||
| } | ||||
|  | ||||
| func (m *podContainerManagerStub) GetPodCgroupCpuLimit(_ *v1.Pod) (int64, uint64, uint64, error) { | ||||
| 	return 0, 0, 0, nil | ||||
| } | ||||
|  | ||||
| func (m *podContainerManagerStub) SetPodCgroupMemoryLimit(_ *v1.Pod, _ int64) error { | ||||
| 	return nil | ||||
| } | ||||
|  | ||||
| func (m *podContainerManagerStub) SetPodCgroupCpuLimit(_ *v1.Pod, _ *int64, _, _ *uint64) error { | ||||
| 	return nil | ||||
| } | ||||
|   | ||||
| @@ -84,6 +84,14 @@ type CgroupManager interface { | ||||
| 	ReduceCPULimits(cgroupName CgroupName) error | ||||
| 	// MemoryUsage returns current memory usage of the specified cgroup, as read from the cgroupfs. | ||||
| 	MemoryUsage(name CgroupName) (int64, error) | ||||
| 	// GetCgroupMemoryConfig returns the memory limit of the specified cgroup as read from cgroup fs. | ||||
| 	GetCgroupMemoryConfig(name CgroupName) (uint64, error) | ||||
| 	// GetCgroupCpuConfig returns the cpu quota, cpu period, and cpu shares of the specified cgroup as read from cgroup fs. | ||||
| 	GetCgroupCpuConfig(name CgroupName) (int64, uint64, uint64, error) | ||||
| 	// SetCgroupMemoryConfig sets the memory limit of the specified cgroup. | ||||
| 	SetCgroupMemoryConfig(name CgroupName, memoryLimit int64) error | ||||
| 	// SetCgroupCpuConfig sets the cpu quota, cpu period, and cpu shares of the specified cgroup. | ||||
| 	SetCgroupCpuConfig(name CgroupName, cpuQuota *int64, cpuPeriod, cpuShares *uint64) error | ||||
| } | ||||
|  | ||||
| // QOSContainersInfo stores the names of containers per qos | ||||
| @@ -119,4 +127,19 @@ type PodContainerManager interface { | ||||
|  | ||||
| 	// IsPodCgroup returns true if the literal cgroupfs name corresponds to a pod | ||||
| 	IsPodCgroup(cgroupfs string) (bool, types.UID) | ||||
|  | ||||
| 	// Get value of memory.usage_in_bytes for the pod Cgroup | ||||
| 	GetPodCgroupMemoryUsage(pod *v1.Pod) (uint64, error) | ||||
|  | ||||
| 	// Get value of memory.limit_in_bytes for the pod Cgroup | ||||
| 	GetPodCgroupMemoryConfig(pod *v1.Pod) (uint64, error) | ||||
|  | ||||
| 	// Get values of cpu.cfs_quota_us, cpu.cfs_period_us, and cpu.shares for the pod Cgroup | ||||
| 	GetPodCgroupCpuConfig(pod *v1.Pod) (int64, uint64, uint64, error) | ||||
|  | ||||
| 	// Set value of memory.limit_in_bytes for the pod Cgroup | ||||
| 	SetPodCgroupMemoryConfig(pod *v1.Pod, memoryLimit int64) error | ||||
|  | ||||
| 	// Set values of cpu.cfs_quota_us, cpu.cfs_period_us, and cpu.shares for the pod Cgroup | ||||
| 	SetPodCgroupCpuConfig(pod *v1.Pod, cpuQuota *int64, cpuPeriod, cpuShares *uint64) error | ||||
| } | ||||
|   | ||||
| @@ -117,6 +117,23 @@ func HashContainer(container *v1.Container) uint64 { | ||||
| 	return uint64(hash.Sum32()) | ||||
| } | ||||
|  | ||||
| // HashContainerWithoutResources returns the hash of the container with Resources field zero'd out. | ||||
| func HashContainerWithoutResources(container *v1.Container) uint64 { | ||||
| 	// InPlacePodVerticalScaling enables mutable Resources field. | ||||
| 	// Changes to this field may not require container restart depending on policy. | ||||
| 	// Compute hash over fields besides the Resources field | ||||
| 	// NOTE: This is needed during alpha and beta so that containers using Resources but | ||||
| 	//       not subject to In-place resize are not unexpectedly restarted when | ||||
| 	//       InPlacePodVerticalScaling feature-gate is toggled. | ||||
| 	//TODO(vinaykul,InPlacePodVerticalScaling): Remove this in GA+1 and make HashContainerWithoutResources to become Hash. | ||||
| 	hashWithoutResources := fnv.New32a() | ||||
| 	containerCopy := container.DeepCopy() | ||||
| 	containerCopy.Resources = v1.ResourceRequirements{} | ||||
| 	containerJSON, _ := json.Marshal(containerCopy) | ||||
| 	hashutil.DeepHashObject(hashWithoutResources, containerJSON) | ||||
| 	return uint64(hashWithoutResources.Sum32()) | ||||
| } | ||||
|  | ||||
| // envVarsToMap constructs a map of environment name to value from a slice | ||||
| // of env vars. | ||||
| func envVarsToMap(envs []EnvVar) map[string]string { | ||||
| @@ -252,12 +269,13 @@ func ConvertPodStatusToRunningPod(runtimeName string, podStatus *PodStatus) Pod | ||||
| 			continue | ||||
| 		} | ||||
| 		container := &Container{ | ||||
| 			ID:      containerStatus.ID, | ||||
| 			Name:    containerStatus.Name, | ||||
| 			Image:   containerStatus.Image, | ||||
| 			ImageID: containerStatus.ImageID, | ||||
| 			Hash:    containerStatus.Hash, | ||||
| 			State:   containerStatus.State, | ||||
| 			ID:                   containerStatus.ID, | ||||
| 			Name:                 containerStatus.Name, | ||||
| 			Image:                containerStatus.Image, | ||||
| 			ImageID:              containerStatus.ImageID, | ||||
| 			Hash:                 containerStatus.Hash, | ||||
| 			HashWithoutResources: containerStatus.HashWithoutResources, | ||||
| 			State:                containerStatus.State, | ||||
| 		} | ||||
| 		runningPod.Containers = append(runningPod.Containers, container) | ||||
| 	} | ||||
|   | ||||
| @@ -25,6 +25,7 @@ import ( | ||||
| 	"github.com/stretchr/testify/assert" | ||||
|  | ||||
| 	v1 "k8s.io/api/core/v1" | ||||
| 	"k8s.io/apimachinery/pkg/api/resource" | ||||
| 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" | ||||
| ) | ||||
|  | ||||
| @@ -908,3 +909,83 @@ func TestHasWindowsHostProcessContainer(t *testing.T) { | ||||
| 		}) | ||||
| 	} | ||||
| } | ||||
|  | ||||
| func TestHashContainerWithoutResources(t *testing.T) { | ||||
| 	cpu100m := resource.MustParse("100m") | ||||
| 	cpu200m := resource.MustParse("200m") | ||||
| 	mem100M := resource.MustParse("100Mi") | ||||
| 	mem200M := resource.MustParse("200Mi") | ||||
| 	cpuPolicyRestartNotRequired := v1.ContainerResizePolicy{ResourceName: v1.ResourceCPU, Policy: v1.RestartNotRequired} | ||||
| 	memPolicyRestartNotRequired := v1.ContainerResizePolicy{ResourceName: v1.ResourceMemory, Policy: v1.RestartNotRequired} | ||||
| 	cpuPolicyRestartRequired := v1.ContainerResizePolicy{ResourceName: v1.ResourceCPU, Policy: v1.RestartRequired} | ||||
| 	memPolicyRestartRequired := v1.ContainerResizePolicy{ResourceName: v1.ResourceMemory, Policy: v1.RestartRequired} | ||||
|  | ||||
| 	type testCase struct { | ||||
| 		name         string | ||||
| 		container    *v1.Container | ||||
| 		expectedHash uint64 | ||||
| 	} | ||||
|  | ||||
| 	tests := []testCase{ | ||||
| 		{ | ||||
| 			"Burstable pod with CPU policy restart required", | ||||
| 			&v1.Container{ | ||||
| 				Name:  "foo", | ||||
| 				Image: "bar", | ||||
| 				Resources: v1.ResourceRequirements{ | ||||
| 					Limits:   v1.ResourceList{v1.ResourceCPU: cpu200m, v1.ResourceMemory: mem200M}, | ||||
| 					Requests: v1.ResourceList{v1.ResourceCPU: cpu100m, v1.ResourceMemory: mem100M}, | ||||
| 				}, | ||||
| 				ResizePolicy: []v1.ContainerResizePolicy{cpuPolicyRestartRequired, memPolicyRestartNotRequired}, | ||||
| 			}, | ||||
| 			0x86a4393c, | ||||
| 		}, | ||||
| 		{ | ||||
| 			"Burstable pod with memory policy restart required", | ||||
| 			&v1.Container{ | ||||
| 				Name:  "foo", | ||||
| 				Image: "bar", | ||||
| 				Resources: v1.ResourceRequirements{ | ||||
| 					Limits:   v1.ResourceList{v1.ResourceCPU: cpu200m, v1.ResourceMemory: mem200M}, | ||||
| 					Requests: v1.ResourceList{v1.ResourceCPU: cpu100m, v1.ResourceMemory: mem100M}, | ||||
| 				}, | ||||
| 				ResizePolicy: []v1.ContainerResizePolicy{cpuPolicyRestartNotRequired, memPolicyRestartRequired}, | ||||
| 			}, | ||||
| 			0x73a18cce, | ||||
| 		}, | ||||
| 		{ | ||||
| 			"Guaranteed pod with CPU policy restart required", | ||||
| 			&v1.Container{ | ||||
| 				Name:  "foo", | ||||
| 				Image: "bar", | ||||
| 				Resources: v1.ResourceRequirements{ | ||||
| 					Limits:   v1.ResourceList{v1.ResourceCPU: cpu100m, v1.ResourceMemory: mem100M}, | ||||
| 					Requests: v1.ResourceList{v1.ResourceCPU: cpu100m, v1.ResourceMemory: mem100M}, | ||||
| 				}, | ||||
| 				ResizePolicy: []v1.ContainerResizePolicy{cpuPolicyRestartRequired, memPolicyRestartNotRequired}, | ||||
| 			}, | ||||
| 			0x86a4393c, | ||||
| 		}, | ||||
| 		{ | ||||
| 			"Guaranteed pod with memory policy restart required", | ||||
| 			&v1.Container{ | ||||
| 				Name:  "foo", | ||||
| 				Image: "bar", | ||||
| 				Resources: v1.ResourceRequirements{ | ||||
| 					Limits:   v1.ResourceList{v1.ResourceCPU: cpu100m, v1.ResourceMemory: mem100M}, | ||||
| 					Requests: v1.ResourceList{v1.ResourceCPU: cpu100m, v1.ResourceMemory: mem100M}, | ||||
| 				}, | ||||
| 				ResizePolicy: []v1.ContainerResizePolicy{cpuPolicyRestartNotRequired, memPolicyRestartRequired}, | ||||
| 			}, | ||||
| 			0x73a18cce, | ||||
| 		}, | ||||
| 	} | ||||
| 	for _, tc := range tests { | ||||
| 		t.Run(tc.name, func(t *testing.T) { | ||||
| 			containerCopy := tc.container.DeepCopy() | ||||
| 			hash := HashContainerWithoutResources(tc.container) | ||||
| 			assert.Equal(t, tc.expectedHash, hash, "[%s]", tc.name) | ||||
| 			assert.Equal(t, containerCopy, tc.container, "[%s]", tc.name) | ||||
| 		}) | ||||
| 	} | ||||
| } | ||||
|   | ||||
| @@ -27,6 +27,7 @@ import ( | ||||
| 	"time" | ||||
|  | ||||
| 	v1 "k8s.io/api/core/v1" | ||||
| 	"k8s.io/apimachinery/pkg/api/resource" | ||||
| 	"k8s.io/apimachinery/pkg/types" | ||||
| 	"k8s.io/client-go/tools/remotecommand" | ||||
| 	"k8s.io/client-go/util/flowcontrol" | ||||
| @@ -295,6 +296,11 @@ type Container struct { | ||||
| 	// Hash of the container, used for comparison. Optional for containers | ||||
| 	// not managed by kubelet. | ||||
| 	Hash uint64 | ||||
| 	// Hash of the container over fields with Resources field zero'd out. | ||||
| 	// NOTE: This is needed during alpha and beta so that containers using Resources are | ||||
| 	// not unexpectedly restarted when InPlacePodVerticalScaling feature-gate is toggled. | ||||
| 	//TODO(vinaykul,InPlacePodVerticalScaling): Remove this in GA+1 and make HashWithoutResources to become Hash. | ||||
| 	HashWithoutResources uint64 | ||||
| 	// State is the state of the container. | ||||
| 	State State | ||||
| } | ||||
| @@ -319,6 +325,18 @@ type PodStatus struct { | ||||
| 	TimeStamp time.Time | ||||
| } | ||||
|  | ||||
| // ContainerResources represents the Resources allocated to the running container. | ||||
| type ContainerResources struct { | ||||
| 	// CPU capacity reserved for the container (cpu.shares) | ||||
| 	CPURequest *resource.Quantity | ||||
| 	// CPU limit enforced on the container (cpu.cfs_quota_us) | ||||
| 	CPULimit *resource.Quantity | ||||
| 	// Memory capaacity reserved for the container | ||||
| 	MemoryRequest *resource.Quantity | ||||
| 	// Memory limit enforced on the container (memory.limit_in_bytes) | ||||
| 	MemoryLimit *resource.Quantity | ||||
| } | ||||
|  | ||||
| // Status represents the status of a container. | ||||
| type Status struct { | ||||
| 	// ID of the container. | ||||
| @@ -342,6 +360,8 @@ type Status struct { | ||||
| 	ImageID string | ||||
| 	// Hash of the container, used for comparison. | ||||
| 	Hash uint64 | ||||
| 	// Hash of the container over fields with Resources field zero'd out. | ||||
| 	HashWithoutResources uint64 | ||||
| 	// Number of times that the container has been restarted. | ||||
| 	RestartCount int | ||||
| 	// A string explains why container is in such a status. | ||||
| @@ -349,6 +369,8 @@ type Status struct { | ||||
| 	// Message written by the container before exiting (stored in | ||||
| 	// TerminationMessagePath). | ||||
| 	Message string | ||||
| 	// CPU and memory resources for this container | ||||
| 	Resources *ContainerResources | ||||
| } | ||||
|  | ||||
| // FindContainerStatusByName returns container status in the pod status with the given name. | ||||
|   | ||||
| @@ -25,10 +25,13 @@ import ( | ||||
|  | ||||
| 	v1 "k8s.io/api/core/v1" | ||||
| 	"k8s.io/apimachinery/pkg/api/resource" | ||||
| 	utilfeature "k8s.io/apiserver/pkg/util/feature" | ||||
| 	corev1helpers "k8s.io/component-helpers/scheduling/corev1" | ||||
| 	"k8s.io/klog/v2" | ||||
| 	statsapi "k8s.io/kubelet/pkg/apis/stats/v1alpha1" | ||||
| 	podutil "k8s.io/kubernetes/pkg/api/v1/pod" | ||||
| 	v1resource "k8s.io/kubernetes/pkg/api/v1/resource" | ||||
| 	"k8s.io/kubernetes/pkg/features" | ||||
| 	evictionapi "k8s.io/kubernetes/pkg/kubelet/eviction/api" | ||||
| 	kubetypes "k8s.io/kubernetes/pkg/kubelet/types" | ||||
| 	volumeutils "k8s.io/kubernetes/pkg/volume/util" | ||||
| @@ -1018,6 +1021,12 @@ func evictionMessage(resourceToReclaim v1.ResourceName, pod *v1.Pod, stats stats | ||||
| 		for _, container := range pod.Spec.Containers { | ||||
| 			if container.Name == containerStats.Name { | ||||
| 				requests := container.Resources.Requests[resourceToReclaim] | ||||
| 				if utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) && | ||||
| 					(resourceToReclaim == v1.ResourceMemory || resourceToReclaim == v1.ResourceCPU) { | ||||
| 					if cs, ok := podutil.GetContainerStatus(pod.Status.ContainerStatuses, container.Name); ok { | ||||
| 						requests = cs.ResourcesAllocated[resourceToReclaim] | ||||
| 					} | ||||
| 				} | ||||
| 				var usage *resource.Quantity | ||||
| 				switch resourceToReclaim { | ||||
| 				case v1.ResourceEphemeralStorage: | ||||
|   | ||||
| @@ -21,6 +21,7 @@ import ( | ||||
| 	"fmt" | ||||
| 	"reflect" | ||||
| 	"sort" | ||||
| 	"strings" | ||||
| 	"testing" | ||||
| 	"time" | ||||
|  | ||||
| @@ -2121,3 +2122,51 @@ func (s1 thresholdList) Equal(s2 thresholdList) bool { | ||||
| 	} | ||||
| 	return true | ||||
| } | ||||
|  | ||||
| func TestEvictonMessageWithResourceResize(t *testing.T) { | ||||
| 	testpod := newPod("testpod", 1, []v1.Container{ | ||||
| 		newContainer("testcontainer", newResourceList("", "200Mi", ""), newResourceList("", "", "")), | ||||
| 	}, nil) | ||||
| 	testpod.Status = v1.PodStatus{ | ||||
| 		ContainerStatuses: []v1.ContainerStatus{ | ||||
| 			{ | ||||
| 				Name:               "testcontainer", | ||||
| 				ResourcesAllocated: newResourceList("", "100Mi", ""), | ||||
| 			}, | ||||
| 		}, | ||||
| 	} | ||||
| 	testpodMemory := resource.MustParse("150Mi") | ||||
| 	testpodStats := newPodMemoryStats(testpod, testpodMemory) | ||||
| 	testpodMemoryBytes := uint64(testpodMemory.Value()) | ||||
| 	testpodStats.Containers = []statsapi.ContainerStats{ | ||||
| 		{ | ||||
| 			Name: "testcontainer", | ||||
| 			Memory: &statsapi.MemoryStats{ | ||||
| 				WorkingSetBytes: &testpodMemoryBytes, | ||||
| 			}, | ||||
| 		}, | ||||
| 	} | ||||
| 	stats := map[*v1.Pod]statsapi.PodStats{ | ||||
| 		testpod: testpodStats, | ||||
| 	} | ||||
| 	statsFn := func(pod *v1.Pod) (statsapi.PodStats, bool) { | ||||
| 		result, found := stats[pod] | ||||
| 		return result, found | ||||
| 	} | ||||
|  | ||||
| 	for _, enabled := range []bool{true, false} { | ||||
| 		t.Run(fmt.Sprintf("InPlacePodVerticalScaling enabled=%v", enabled), func(t *testing.T) { | ||||
| 			defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.InPlacePodVerticalScaling, enabled)() | ||||
| 			msg, _ := evictionMessage(v1.ResourceMemory, testpod, statsFn) | ||||
| 			if enabled { | ||||
| 				if !strings.Contains(msg, "testcontainer was using 150Mi, which exceeds its request of 100Mi") { | ||||
| 					t.Errorf("Expected 'exceeds memory' eviction message was not found.") | ||||
| 				} | ||||
| 			} else { | ||||
| 				if strings.Contains(msg, "which exceeds its request") { | ||||
| 					t.Errorf("Found 'exceeds memory' eviction message which was not expected.") | ||||
| 				} | ||||
| 			} | ||||
| 		}) | ||||
| 	} | ||||
| } | ||||
|   | ||||
| @@ -48,6 +48,7 @@ import ( | ||||
| 	"k8s.io/apimachinery/pkg/fields" | ||||
| 	"k8s.io/apimachinery/pkg/labels" | ||||
| 	"k8s.io/apimachinery/pkg/types" | ||||
| 	"k8s.io/apimachinery/pkg/util/diff" | ||||
| 	utilruntime "k8s.io/apimachinery/pkg/util/runtime" | ||||
| 	"k8s.io/apimachinery/pkg/util/sets" | ||||
| 	"k8s.io/apimachinery/pkg/util/wait" | ||||
| @@ -66,6 +67,8 @@ import ( | ||||
| 	"k8s.io/klog/v2" | ||||
| 	pluginwatcherapi "k8s.io/kubelet/pkg/apis/pluginregistration/v1" | ||||
| 	statsapi "k8s.io/kubelet/pkg/apis/stats/v1alpha1" | ||||
| 	podutil "k8s.io/kubernetes/pkg/api/v1/pod" | ||||
| 	"k8s.io/kubernetes/pkg/api/v1/resource" | ||||
| 	"k8s.io/kubernetes/pkg/features" | ||||
| 	kubeletconfiginternal "k8s.io/kubernetes/pkg/kubelet/apis/config" | ||||
| 	"k8s.io/kubernetes/pkg/kubelet/apis/podresources" | ||||
| @@ -608,7 +611,7 @@ func NewMainKubelet(kubeCfg *kubeletconfiginternal.KubeletConfiguration, | ||||
| 	mirrorPodClient := kubepod.NewBasicMirrorClient(klet.kubeClient, string(nodeName), nodeLister) | ||||
| 	klet.podManager = kubepod.NewBasicPodManager(mirrorPodClient) | ||||
|  | ||||
| 	klet.statusManager = status.NewManager(klet.kubeClient, klet.podManager, klet, kubeDeps.PodStartupLatencyTracker) | ||||
| 	klet.statusManager = status.NewManager(klet.kubeClient, klet.podManager, klet, kubeDeps.PodStartupLatencyTracker, klet.getRootDir()) | ||||
|  | ||||
| 	klet.resourceAnalyzer = serverstats.NewResourceAnalyzer(klet, kubeCfg.VolumeStatsAggPeriod.Duration, kubeDeps.Recorder) | ||||
|  | ||||
| @@ -665,7 +668,7 @@ func NewMainKubelet(kubeCfg *kubeletconfiginternal.KubeletConfiguration, | ||||
| 		kubeCfg.CPUCFSQuotaPeriod, | ||||
| 		kubeDeps.RemoteRuntimeService, | ||||
| 		kubeDeps.RemoteImageService, | ||||
| 		kubeDeps.ContainerManager.InternalContainerLifecycle(), | ||||
| 		kubeDeps.ContainerManager, | ||||
| 		klet.containerLogManager, | ||||
| 		klet.runtimeClassManager, | ||||
| 		seccompDefault, | ||||
| @@ -1247,6 +1250,9 @@ type Kubelet struct { | ||||
|  | ||||
| 	// Manage user namespaces | ||||
| 	usernsManager *usernsManager | ||||
|  | ||||
| 	// Mutex to serialize new pod admission and existing pod resizing | ||||
| 	podResizeMutex sync.Mutex | ||||
| } | ||||
|  | ||||
| // ListPodStats is delegated to StatsProvider, which implements stats.Provider interface | ||||
| @@ -1826,6 +1832,16 @@ func (kl *Kubelet) syncPod(_ context.Context, updateType kubetypes.SyncPodType, | ||||
| 	// Ensure the pod is being probed | ||||
| 	kl.probeManager.AddPod(pod) | ||||
|  | ||||
| 	if utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) { | ||||
| 		// Handle pod resize here instead of doing it in HandlePodUpdates because | ||||
| 		// this conveniently retries any Deferred resize requests | ||||
| 		// TODO(vinaykul,InPlacePodVerticalScaling): Investigate doing this in HandlePodUpdates + periodic SyncLoop scan | ||||
| 		//     See: https://github.com/kubernetes/kubernetes/pull/102884#discussion_r663160060 | ||||
| 		if kl.podWorkers.CouldHaveRunningContainers(pod.UID) && !kubetypes.IsStaticPod(pod) { | ||||
| 			kl.handlePodResourcesResize(pod) | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	// Call the container runtime's SyncPod callback | ||||
| 	result := kl.containerRuntime.SyncPod(ctx, pod, podStatus, pullSecrets, kl.backOff) | ||||
| 	kl.reasonCache.Update(pod.UID, result) | ||||
| @@ -1842,6 +1858,15 @@ func (kl *Kubelet) syncPod(_ context.Context, updateType kubetypes.SyncPodType, | ||||
| 		return false, nil | ||||
| 	} | ||||
|  | ||||
| 	if utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) && isPodResizeInProgress(pod, &apiPodStatus) { | ||||
| 		// While resize is in progress, periodically call PLEG to update pod cache | ||||
| 		runningPod := kubecontainer.ConvertPodStatusToRunningPod(kl.getRuntime().Type(), podStatus) | ||||
| 		if err := kl.pleg.UpdateCache(&runningPod, pod.UID); err != nil { | ||||
| 			klog.ErrorS(err, "Failed to update pod cache", "pod", klog.KObj(pod)) | ||||
| 			return false, err | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	return false, nil | ||||
| } | ||||
|  | ||||
| @@ -2078,6 +2103,23 @@ func (kl *Kubelet) canAdmitPod(pods []*v1.Pod, pod *v1.Pod) (bool, string, strin | ||||
| 	// TODO: move out of disk check into a pod admitter | ||||
| 	// TODO: out of resource eviction should have a pod admitter call-out | ||||
| 	attrs := &lifecycle.PodAdmitAttributes{Pod: pod, OtherPods: pods} | ||||
| 	if utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) { | ||||
| 		// Use allocated resources values from checkpoint store (source of truth) to determine fit | ||||
| 		otherPods := make([]*v1.Pod, 0, len(pods)) | ||||
| 		checkpointState := kl.statusManager.State() | ||||
| 		for _, p := range pods { | ||||
| 			op := p.DeepCopy() | ||||
| 			for _, c := range op.Spec.Containers { | ||||
| 				resourcesAllocated, found := checkpointState.GetContainerResourceAllocation(string(p.UID), c.Name) | ||||
| 				if c.Resources.Requests != nil && found { | ||||
| 					c.Resources.Requests[v1.ResourceCPU] = resourcesAllocated[v1.ResourceCPU] | ||||
| 					c.Resources.Requests[v1.ResourceMemory] = resourcesAllocated[v1.ResourceMemory] | ||||
| 				} | ||||
| 			} | ||||
| 			otherPods = append(otherPods, op) | ||||
| 		} | ||||
| 		attrs.OtherPods = otherPods | ||||
| 	} | ||||
| 	for _, podAdmitHandler := range kl.admitHandlers { | ||||
| 		if result := podAdmitHandler.Admit(attrs); !result.Admit { | ||||
| 			return false, result.Reason, result.Message | ||||
| @@ -2332,6 +2374,10 @@ func (kl *Kubelet) handleMirrorPod(mirrorPod *v1.Pod, start time.Time) { | ||||
| func (kl *Kubelet) HandlePodAdditions(pods []*v1.Pod) { | ||||
| 	start := kl.clock.Now() | ||||
| 	sort.Sort(sliceutils.PodsByCreationTime(pods)) | ||||
| 	if utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) { | ||||
| 		kl.podResizeMutex.Lock() | ||||
| 		defer kl.podResizeMutex.Unlock() | ||||
| 	} | ||||
| 	for _, pod := range pods { | ||||
| 		existingPods := kl.podManager.GetPods() | ||||
| 		// Always add the pod to the pod manager. Kubelet relies on the pod | ||||
| @@ -2356,10 +2402,36 @@ func (kl *Kubelet) HandlePodAdditions(pods []*v1.Pod) { | ||||
| 			// pods that are alive. | ||||
| 			activePods := kl.filterOutInactivePods(existingPods) | ||||
|  | ||||
| 			// Check if we can admit the pod; if not, reject it. | ||||
| 			if ok, reason, message := kl.canAdmitPod(activePods, pod); !ok { | ||||
| 				kl.rejectPod(pod, reason, message) | ||||
| 				continue | ||||
| 			if utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) { | ||||
| 				// To handle kubelet restarts, test pod admissibility using ResourcesAllocated values | ||||
| 				// (for cpu & memory) from checkpoint store. If found, that is the source of truth. | ||||
| 				checkpointState := kl.statusManager.State() | ||||
| 				podCopy := pod.DeepCopy() | ||||
| 				for _, c := range podCopy.Spec.Containers { | ||||
| 					resourcesAllocated, found := checkpointState.GetContainerResourceAllocation(string(pod.UID), c.Name) | ||||
| 					if c.Resources.Requests != nil && found { | ||||
| 						c.Resources.Requests[v1.ResourceCPU] = resourcesAllocated[v1.ResourceCPU] | ||||
| 						c.Resources.Requests[v1.ResourceMemory] = resourcesAllocated[v1.ResourceMemory] | ||||
| 					} | ||||
| 				} | ||||
|  | ||||
| 				// Check if we can admit the pod; if not, reject it. | ||||
| 				if ok, reason, message := kl.canAdmitPod(activePods, podCopy); !ok { | ||||
| 					kl.rejectPod(pod, reason, message) | ||||
| 					continue | ||||
| 				} | ||||
|  | ||||
| 				// For new pod, checkpoint the resource values at which the Pod has been admitted | ||||
| 				if err := kl.statusManager.SetPodAllocation(podCopy); err != nil { | ||||
| 					//TODO(vinaykul,InPlacePodVerticalScaling): Can we recover from this in some way? Investigate | ||||
| 					klog.ErrorS(err, "SetPodAllocation failed", "pod", klog.KObj(pod)) | ||||
| 				} | ||||
| 			} else { | ||||
| 				// Check if we can admit the pod; if not, reject it. | ||||
| 				if ok, reason, message := kl.canAdmitPod(activePods, pod); !ok { | ||||
| 					kl.rejectPod(pod, reason, message) | ||||
| 					continue | ||||
| 				} | ||||
| 			} | ||||
| 		} | ||||
| 		mirrorPod, _ := kl.podManager.GetMirrorPodByPod(pod) | ||||
| @@ -2434,6 +2506,116 @@ func (kl *Kubelet) HandlePodSyncs(pods []*v1.Pod) { | ||||
| 	} | ||||
| } | ||||
|  | ||||
| func isPodResizeInProgress(pod *v1.Pod, podStatus *v1.PodStatus) bool { | ||||
| 	for _, c := range pod.Spec.Containers { | ||||
| 		if cs, ok := podutil.GetContainerStatus(podStatus.ContainerStatuses, c.Name); ok { | ||||
| 			if cs.Resources == nil { | ||||
| 				continue | ||||
| 			} | ||||
| 			if diff.ObjectDiff(c.Resources.Limits, cs.Resources.Limits) != "" || | ||||
| 				diff.ObjectDiff(cs.ResourcesAllocated, cs.Resources.Requests) != "" { | ||||
| 				return true | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
| 	return false | ||||
| } | ||||
|  | ||||
| func (kl *Kubelet) canResizePod(pod *v1.Pod) (bool, *v1.Pod, v1.PodResizeStatus) { | ||||
| 	var otherActivePods []*v1.Pod | ||||
|  | ||||
| 	node, err := kl.getNodeAnyWay() | ||||
| 	if err != nil { | ||||
| 		klog.ErrorS(err, "getNodeAnyway function failed") | ||||
| 		return false, nil, "" | ||||
| 	} | ||||
| 	cpuAvailable := node.Status.Allocatable.Cpu().MilliValue() | ||||
| 	memAvailable := node.Status.Allocatable.Memory().Value() | ||||
| 	cpuRequests := resource.GetResourceRequest(pod, v1.ResourceCPU) | ||||
| 	memRequests := resource.GetResourceRequest(pod, v1.ResourceMemory) | ||||
| 	if cpuRequests > cpuAvailable || memRequests > memAvailable { | ||||
| 		klog.V(3).InfoS("Resize is not feasible as request exceeds allocatable node resources", "Pod", pod.Name) | ||||
| 		return false, nil, v1.PodResizeStatusInfeasible | ||||
| 	} | ||||
|  | ||||
| 	// Treat the existing pod needing resize as a new pod with desired resources seeking admit. | ||||
| 	// If desired resources don't fit, pod continues to run with currently allocated resources. | ||||
| 	activePods := kl.GetActivePods() | ||||
| 	for _, p := range activePods { | ||||
| 		if p.UID != pod.UID { | ||||
| 			otherActivePods = append(otherActivePods, p) | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	if ok, failReason, failMessage := kl.canAdmitPod(otherActivePods, pod); !ok { | ||||
| 		// Log reason and return. Let the next sync iteration retry the resize | ||||
| 		klog.V(3).InfoS("Resize cannot be accommodated", "Pod", pod.Name, "Reason", failReason, "Message", failMessage) | ||||
| 		return false, nil, v1.PodResizeStatusDeferred | ||||
| 	} | ||||
|  | ||||
| 	podCopy := pod.DeepCopy() | ||||
| 	for _, container := range podCopy.Spec.Containers { | ||||
| 		idx, found := podutil.GetIndexOfContainerStatus(podCopy.Status.ContainerStatuses, container.Name) | ||||
| 		if found { | ||||
| 			for rName, rQuantity := range container.Resources.Requests { | ||||
| 				podCopy.Status.ContainerStatuses[idx].ResourcesAllocated[rName] = rQuantity | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
| 	return true, podCopy, v1.PodResizeStatusInProgress | ||||
| } | ||||
|  | ||||
| func (kl *Kubelet) handlePodResourcesResize(pod *v1.Pod) { | ||||
| 	if pod.Status.Phase != v1.PodRunning { | ||||
| 		return | ||||
| 	} | ||||
| 	podResized := false | ||||
| 	for _, container := range pod.Spec.Containers { | ||||
| 		if len(container.Resources.Requests) == 0 { | ||||
| 			continue | ||||
| 		} | ||||
| 		containerStatus, found := podutil.GetContainerStatus(pod.Status.ContainerStatuses, container.Name) | ||||
| 		if !found { | ||||
| 			klog.V(5).InfoS("ContainerStatus not found", "pod", pod.Name, "container", container.Name) | ||||
| 			break | ||||
| 		} | ||||
| 		if len(containerStatus.ResourcesAllocated) != len(container.Resources.Requests) { | ||||
| 			klog.V(5).InfoS("ContainerStatus.ResourcesAllocated length mismatch", "pod", pod.Name, "container", container.Name) | ||||
| 			break | ||||
| 		} | ||||
| 		if len(diff.ObjectDiff(container.Resources.Requests, containerStatus.ResourcesAllocated)) > 0 { | ||||
| 			podResized = true | ||||
| 			break | ||||
| 		} | ||||
| 	} | ||||
| 	if !podResized { | ||||
| 		return | ||||
| 	} | ||||
|  | ||||
| 	kl.podResizeMutex.Lock() | ||||
| 	defer kl.podResizeMutex.Unlock() | ||||
| 	fit, updatedPod, resizeStatus := kl.canResizePod(pod) | ||||
| 	if fit { | ||||
| 		// Update pod resource allocation checkpoint | ||||
| 		if err := kl.statusManager.SetPodAllocation(updatedPod); err != nil { | ||||
| 			//TODO(vinaykul,InPlacePodVerticalScaling): Can we recover from this in some way? Investigate | ||||
| 			klog.ErrorS(err, "SetPodAllocation failed", "pod", klog.KObj(pod)) | ||||
| 		} | ||||
| 		*pod = *updatedPod | ||||
| 	} | ||||
| 	if resizeStatus != "" { | ||||
| 		// Save resize decision to checkpoint | ||||
| 		if err := kl.statusManager.SetPodResizeStatus(pod.UID, resizeStatus); err != nil { | ||||
| 			//TODO(vinaykul,InPlacePodVerticalScaling): Can we recover from this in some way? Investigate | ||||
| 			klog.ErrorS(err, "SetPodResizeStatus failed", "pod", klog.KObj(pod)) | ||||
| 		} | ||||
| 		pod.Status.Resize = resizeStatus | ||||
| 	} | ||||
| 	kl.podManager.UpdatePod(pod) | ||||
| 	kl.statusManager.SetPodStatus(pod, pod.Status) | ||||
| 	return | ||||
| } | ||||
|  | ||||
| // LatestLoopEntryTime returns the last time in the sync loop monitor. | ||||
| func (kl *Kubelet) LatestLoopEntryTime() time.Time { | ||||
| 	val := kl.syncLoopMonitor.Load() | ||||
|   | ||||
| @@ -34,6 +34,7 @@ import ( | ||||
| 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" | ||||
| 	"k8s.io/apimachinery/pkg/labels" | ||||
| 	"k8s.io/apimachinery/pkg/types" | ||||
| 	"k8s.io/apimachinery/pkg/util/diff" | ||||
| 	"k8s.io/apimachinery/pkg/util/sets" | ||||
| 	utilvalidation "k8s.io/apimachinery/pkg/util/validation" | ||||
| 	utilfeature "k8s.io/apiserver/pkg/util/feature" | ||||
| @@ -1454,6 +1455,31 @@ func getPhase(spec *v1.PodSpec, info []v1.ContainerStatus) v1.PodPhase { | ||||
| 	} | ||||
| } | ||||
|  | ||||
| func (kl *Kubelet) determinePodResizeStatus(pod *v1.Pod, podStatus *v1.PodStatus) v1.PodResizeStatus { | ||||
| 	var podResizeStatus v1.PodResizeStatus | ||||
| 	specStatusDiffer := false | ||||
| 	for _, c := range pod.Spec.Containers { | ||||
| 		if cs, ok := podutil.GetContainerStatus(podStatus.ContainerStatuses, c.Name); ok { | ||||
| 			if cs.Resources != nil && diff.ObjectDiff(c.Resources, *cs.Resources) != "" { | ||||
| 				specStatusDiffer = true | ||||
| 				break | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
| 	if !specStatusDiffer { | ||||
| 		// Clear last resize state from checkpoint | ||||
| 		if err := kl.statusManager.SetPodResizeStatus(pod.UID, ""); err != nil { | ||||
| 			klog.ErrorS(err, "SetPodResizeStatus failed", "pod", pod.Name) | ||||
| 		} | ||||
| 	} else { | ||||
| 		checkpointState := kl.statusManager.State() | ||||
| 		if resizeStatus, found := checkpointState.GetPodResizeStatus(string(pod.UID)); found { | ||||
| 			podResizeStatus = resizeStatus | ||||
| 		} | ||||
| 	} | ||||
| 	return podResizeStatus | ||||
| } | ||||
|  | ||||
| // generateAPIPodStatus creates the final API pod status for a pod, given the | ||||
| // internal pod status. This method should only be called from within sync*Pod methods. | ||||
| func (kl *Kubelet) generateAPIPodStatus(pod *v1.Pod, podStatus *kubecontainer.PodStatus) v1.PodStatus { | ||||
| @@ -1464,6 +1490,9 @@ func (kl *Kubelet) generateAPIPodStatus(pod *v1.Pod, podStatus *kubecontainer.Po | ||||
| 		oldPodStatus = pod.Status | ||||
| 	} | ||||
| 	s := kl.convertStatusToAPIStatus(pod, podStatus, oldPodStatus) | ||||
| 	if utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) { | ||||
| 		s.Resize = kl.determinePodResizeStatus(pod, s) | ||||
| 	} | ||||
| 	// calculate the next phase and preserve reason | ||||
| 	allStatus := append(append([]v1.ContainerStatus{}, s.ContainerStatuses...), s.InitContainerStatuses...) | ||||
| 	s.Phase = getPhase(&pod.Spec, allStatus) | ||||
| @@ -1715,6 +1744,84 @@ func (kl *Kubelet) convertToAPIContainerStatuses(pod *v1.Pod, podStatus *kubecon | ||||
| 		return status | ||||
| 	} | ||||
|  | ||||
| 	convertContainerStatusResources := func(cName string, status *v1.ContainerStatus, cStatus *kubecontainer.Status, oldStatuses map[string]v1.ContainerStatus) *v1.ResourceRequirements { | ||||
| 		var requests, limits v1.ResourceList | ||||
| 		// oldStatus should always exist if container is running | ||||
| 		oldStatus, oldStatusFound := oldStatuses[cName] | ||||
| 		// Initialize limits/requests from container's spec upon transition to Running state | ||||
| 		// For cpu & memory, values queried from runtime via CRI always supercedes spec values | ||||
| 		// For ephemeral-storage, a running container's status.limit/request equals spec.limit/request | ||||
| 		determineResource := func(rName v1.ResourceName, v1ContainerResource, oldStatusResource, resource v1.ResourceList) { | ||||
| 			if oldStatusFound { | ||||
| 				if oldStatus.State.Running == nil || status.ContainerID != oldStatus.ContainerID { | ||||
| 					if r, exists := v1ContainerResource[rName]; exists { | ||||
| 						resource[rName] = r.DeepCopy() | ||||
| 					} | ||||
| 				} else { | ||||
| 					if oldStatusResource != nil { | ||||
| 						if r, exists := oldStatusResource[rName]; exists { | ||||
| 							resource[rName] = r.DeepCopy() | ||||
| 						} | ||||
| 					} | ||||
| 				} | ||||
| 			} | ||||
| 		} | ||||
| 		container := kubecontainer.GetContainerSpec(pod, cName) | ||||
| 		// ResourcesAllocated values come from checkpoint. It is the source-of-truth. | ||||
| 		found := false | ||||
| 		checkpointState := kl.statusManager.State() | ||||
| 		status.ResourcesAllocated, found = checkpointState.GetContainerResourceAllocation(string(pod.UID), cName) | ||||
| 		if !(container.Resources.Requests == nil && container.Resources.Limits == nil) && !found { | ||||
| 			// Log error and fallback to ResourcesAllocated in oldStatus if it exists | ||||
| 			klog.ErrorS(nil, "resource allocation not found in checkpoint store", "pod", pod.Name, "container", cName) | ||||
| 			if oldStatusFound { | ||||
| 				status.ResourcesAllocated = oldStatus.ResourcesAllocated | ||||
| 			} | ||||
| 		} | ||||
| 		if oldStatus.Resources == nil { | ||||
| 			oldStatus.Resources = &v1.ResourceRequirements{} | ||||
| 		} | ||||
| 		// Convert Limits | ||||
| 		if container.Resources.Limits != nil { | ||||
| 			limits = make(v1.ResourceList) | ||||
| 			if cStatus.Resources != nil && cStatus.Resources.CPULimit != nil { | ||||
| 				limits[v1.ResourceCPU] = cStatus.Resources.CPULimit.DeepCopy() | ||||
| 			} else { | ||||
| 				determineResource(v1.ResourceCPU, container.Resources.Limits, oldStatus.Resources.Limits, limits) | ||||
| 			} | ||||
| 			if cStatus.Resources != nil && cStatus.Resources.MemoryLimit != nil { | ||||
| 				limits[v1.ResourceMemory] = cStatus.Resources.MemoryLimit.DeepCopy() | ||||
| 			} else { | ||||
| 				determineResource(v1.ResourceMemory, container.Resources.Limits, oldStatus.Resources.Limits, limits) | ||||
| 			} | ||||
| 			if ephemeralStorage, found := container.Resources.Limits[v1.ResourceEphemeralStorage]; found { | ||||
| 				limits[v1.ResourceEphemeralStorage] = ephemeralStorage.DeepCopy() | ||||
| 			} | ||||
| 		} | ||||
| 		// Convert Requests | ||||
| 		if status.ResourcesAllocated != nil { | ||||
| 			requests = make(v1.ResourceList) | ||||
| 			if cStatus.Resources != nil && cStatus.Resources.CPURequest != nil { | ||||
| 				requests[v1.ResourceCPU] = cStatus.Resources.CPURequest.DeepCopy() | ||||
| 			} else { | ||||
| 				determineResource(v1.ResourceCPU, status.ResourcesAllocated, oldStatus.Resources.Requests, requests) | ||||
| 			} | ||||
| 			if memory, found := status.ResourcesAllocated[v1.ResourceMemory]; found { | ||||
| 				requests[v1.ResourceMemory] = memory.DeepCopy() | ||||
| 			} | ||||
| 			if ephemeralStorage, found := status.ResourcesAllocated[v1.ResourceEphemeralStorage]; found { | ||||
| 				requests[v1.ResourceEphemeralStorage] = ephemeralStorage.DeepCopy() | ||||
| 			} | ||||
| 		} | ||||
| 		//TODO(vinaykul,derekwaynecarr,InPlacePodVerticalScaling): Update this to include extended resources in | ||||
| 		// addition to CPU, memory, ephemeral storage. Add test case for extended resources. | ||||
| 		resources := &v1.ResourceRequirements{ | ||||
| 			Limits:   limits, | ||||
| 			Requests: requests, | ||||
| 		} | ||||
| 		return resources | ||||
| 	} | ||||
|  | ||||
| 	// Fetch old containers statuses from old pod status. | ||||
| 	oldStatuses := make(map[string]v1.ContainerStatus, len(containers)) | ||||
| 	for _, status := range previousStatus { | ||||
| @@ -1835,6 +1942,11 @@ func (kl *Kubelet) convertToAPIContainerStatuses(pod *v1.Pod, podStatus *kubecon | ||||
| 			oldStatusPtr = &oldStatus | ||||
| 		} | ||||
| 		status := convertContainerStatus(cStatus, oldStatusPtr) | ||||
| 		if utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) { | ||||
| 			if status.State.Running != nil { | ||||
| 				status.Resources = convertContainerStatusResources(cName, status, cStatus, oldStatuses) | ||||
| 			} | ||||
| 		} | ||||
| 		if containerSeen[cName] == 0 { | ||||
| 			statuses[cName] = status | ||||
| 		} else { | ||||
|   | ||||
| @@ -33,6 +33,7 @@ import ( | ||||
| 	v1 "k8s.io/api/core/v1" | ||||
| 	apiequality "k8s.io/apimachinery/pkg/api/equality" | ||||
| 	apierrors "k8s.io/apimachinery/pkg/api/errors" | ||||
| 	"k8s.io/apimachinery/pkg/api/resource" | ||||
| 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" | ||||
| 	"k8s.io/apimachinery/pkg/labels" | ||||
| 	"k8s.io/apimachinery/pkg/runtime" | ||||
| @@ -56,6 +57,7 @@ import ( | ||||
| 	"k8s.io/kubernetes/pkg/kubelet/cri/streaming/portforward" | ||||
| 	"k8s.io/kubernetes/pkg/kubelet/cri/streaming/remotecommand" | ||||
| 	"k8s.io/kubernetes/pkg/kubelet/prober/results" | ||||
| 	"k8s.io/kubernetes/pkg/kubelet/status" | ||||
| 	kubetypes "k8s.io/kubernetes/pkg/kubelet/types" | ||||
| ) | ||||
|  | ||||
| @@ -3861,3 +3863,219 @@ func TestConvertToAPIContainerStatusesDataRace(t *testing.T) { | ||||
| 		}() | ||||
| 	} | ||||
| } | ||||
|  | ||||
| func TestConvertToAPIContainerStatusesForResources(t *testing.T) { | ||||
| 	defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.InPlacePodVerticalScaling, true)() | ||||
| 	nowTime := time.Now() | ||||
| 	testContainerName := "ctr0" | ||||
| 	testContainerID := kubecontainer.ContainerID{Type: "test", ID: testContainerName} | ||||
| 	testContainer := v1.Container{ | ||||
| 		Name:  testContainerName, | ||||
| 		Image: "img", | ||||
| 	} | ||||
| 	testContainerStatus := v1.ContainerStatus{ | ||||
| 		Name: testContainerName, | ||||
| 	} | ||||
| 	testPod := &v1.Pod{ | ||||
| 		ObjectMeta: metav1.ObjectMeta{ | ||||
| 			UID:       "123456", | ||||
| 			Name:      "foo", | ||||
| 			Namespace: "bar", | ||||
| 		}, | ||||
| 		Spec: v1.PodSpec{ | ||||
| 			Containers: []v1.Container{testContainer}, | ||||
| 		}, | ||||
| 		Status: v1.PodStatus{ | ||||
| 			ContainerStatuses: []v1.ContainerStatus{testContainerStatus}, | ||||
| 		}, | ||||
| 	} | ||||
| 	testKubeContainerStatus := kubecontainer.Status{ | ||||
| 		Name:      testContainerName, | ||||
| 		ID:        testContainerID, | ||||
| 		Image:     "img", | ||||
| 		ImageID:   "img1234", | ||||
| 		State:     kubecontainer.ContainerStateRunning, | ||||
| 		StartedAt: nowTime, | ||||
| 	} | ||||
| 	testPodStatus := &kubecontainer.PodStatus{ | ||||
| 		ID:                testPod.UID, | ||||
| 		Name:              testPod.Name, | ||||
| 		Namespace:         testPod.Namespace, | ||||
| 		ContainerStatuses: []*kubecontainer.Status{&testKubeContainerStatus}, | ||||
| 	} | ||||
| 	CPU1AndMem1G := v1.ResourceList{v1.ResourceCPU: resource.MustParse("1"), v1.ResourceMemory: resource.MustParse("1Gi")} | ||||
| 	CPU2AndMem2G := v1.ResourceList{v1.ResourceCPU: resource.MustParse("2"), v1.ResourceMemory: resource.MustParse("2Gi")} | ||||
| 	CPU1AndMem1GAndStorage2G := CPU1AndMem1G.DeepCopy() | ||||
| 	CPU1AndMem1GAndStorage2G[v1.ResourceEphemeralStorage] = resource.MustParse("2Gi") | ||||
| 	CPU2AndMem2GAndStorage2G := CPU2AndMem2G.DeepCopy() | ||||
| 	CPU2AndMem2GAndStorage2G[v1.ResourceEphemeralStorage] = resource.MustParse("2Gi") | ||||
|  | ||||
| 	testKubelet := newTestKubelet(t, false) | ||||
| 	defer testKubelet.Cleanup() | ||||
| 	kubelet := testKubelet.kubelet | ||||
| 	kubelet.statusManager = status.NewFakeManager() | ||||
|  | ||||
| 	idx := 0 | ||||
| 	for tdesc, tc := range map[string]struct { | ||||
| 		Resources []v1.ResourceRequirements | ||||
| 		OldStatus []v1.ContainerStatus | ||||
| 		Expected  []v1.ContainerStatus | ||||
| 	}{ | ||||
| 		"GuaranteedQoSPod with CPU and memory CRI status": { | ||||
| 			Resources: []v1.ResourceRequirements{{Limits: CPU1AndMem1G, Requests: CPU1AndMem1G}}, | ||||
| 			OldStatus: []v1.ContainerStatus{ | ||||
| 				{ | ||||
| 					Name:      testContainerName, | ||||
| 					Image:     "img", | ||||
| 					ImageID:   "img1234", | ||||
| 					State:     v1.ContainerState{Running: &v1.ContainerStateRunning{}}, | ||||
| 					Resources: &v1.ResourceRequirements{Limits: CPU1AndMem1G, Requests: CPU1AndMem1G}, | ||||
| 				}, | ||||
| 			}, | ||||
| 			Expected: []v1.ContainerStatus{ | ||||
| 				{ | ||||
| 					Name:               testContainerName, | ||||
| 					ContainerID:        testContainerID.String(), | ||||
| 					Image:              "img", | ||||
| 					ImageID:            "img1234", | ||||
| 					State:              v1.ContainerState{Running: &v1.ContainerStateRunning{StartedAt: metav1.NewTime(nowTime)}}, | ||||
| 					ResourcesAllocated: CPU1AndMem1G, | ||||
| 					Resources:          &v1.ResourceRequirements{Limits: CPU1AndMem1G, Requests: CPU1AndMem1G}, | ||||
| 				}, | ||||
| 			}, | ||||
| 		}, | ||||
| 		"BurstableQoSPod with CPU and memory CRI status": { | ||||
| 			Resources: []v1.ResourceRequirements{{Limits: CPU1AndMem1G, Requests: CPU1AndMem1G}}, | ||||
| 			OldStatus: []v1.ContainerStatus{ | ||||
| 				{ | ||||
| 					Name:      testContainerName, | ||||
| 					Image:     "img", | ||||
| 					ImageID:   "img1234", | ||||
| 					State:     v1.ContainerState{Running: &v1.ContainerStateRunning{}}, | ||||
| 					Resources: &v1.ResourceRequirements{Limits: CPU2AndMem2G, Requests: CPU1AndMem1G}, | ||||
| 				}, | ||||
| 			}, | ||||
| 			Expected: []v1.ContainerStatus{ | ||||
| 				{ | ||||
| 					Name:               testContainerName, | ||||
| 					ContainerID:        testContainerID.String(), | ||||
| 					Image:              "img", | ||||
| 					ImageID:            "img1234", | ||||
| 					State:              v1.ContainerState{Running: &v1.ContainerStateRunning{StartedAt: metav1.NewTime(nowTime)}}, | ||||
| 					ResourcesAllocated: CPU1AndMem1G, | ||||
| 					Resources:          &v1.ResourceRequirements{Limits: CPU1AndMem1G, Requests: CPU1AndMem1G}, | ||||
| 				}, | ||||
| 			}, | ||||
| 		}, | ||||
| 		"GuaranteedQoSPod with CPU and memory CRI status, with ephemeral storage": { | ||||
| 			Resources: []v1.ResourceRequirements{{Limits: CPU1AndMem1GAndStorage2G, Requests: CPU1AndMem1GAndStorage2G}}, | ||||
| 			OldStatus: []v1.ContainerStatus{ | ||||
| 				{ | ||||
| 					Name:      testContainerName, | ||||
| 					Image:     "img", | ||||
| 					ImageID:   "img1234", | ||||
| 					State:     v1.ContainerState{Running: &v1.ContainerStateRunning{}}, | ||||
| 					Resources: &v1.ResourceRequirements{Limits: CPU1AndMem1G, Requests: CPU1AndMem1G}, | ||||
| 				}, | ||||
| 			}, | ||||
| 			Expected: []v1.ContainerStatus{ | ||||
| 				{ | ||||
| 					Name:               testContainerName, | ||||
| 					ContainerID:        testContainerID.String(), | ||||
| 					Image:              "img", | ||||
| 					ImageID:            "img1234", | ||||
| 					State:              v1.ContainerState{Running: &v1.ContainerStateRunning{StartedAt: metav1.NewTime(nowTime)}}, | ||||
| 					ResourcesAllocated: CPU1AndMem1GAndStorage2G, | ||||
| 					Resources:          &v1.ResourceRequirements{Limits: CPU1AndMem1GAndStorage2G, Requests: CPU1AndMem1GAndStorage2G}, | ||||
| 				}, | ||||
| 			}, | ||||
| 		}, | ||||
| 		"BurstableQoSPod with CPU and memory CRI status, with ephemeral storage": { | ||||
| 			Resources: []v1.ResourceRequirements{{Limits: CPU1AndMem1GAndStorage2G, Requests: CPU1AndMem1GAndStorage2G}}, | ||||
| 			OldStatus: []v1.ContainerStatus{ | ||||
| 				{ | ||||
| 					Name:      testContainerName, | ||||
| 					Image:     "img", | ||||
| 					ImageID:   "img1234", | ||||
| 					State:     v1.ContainerState{Running: &v1.ContainerStateRunning{}}, | ||||
| 					Resources: &v1.ResourceRequirements{Limits: CPU2AndMem2GAndStorage2G, Requests: CPU2AndMem2GAndStorage2G}, | ||||
| 				}, | ||||
| 			}, | ||||
| 			Expected: []v1.ContainerStatus{ | ||||
| 				{ | ||||
| 					Name:               testContainerName, | ||||
| 					ContainerID:        testContainerID.String(), | ||||
| 					Image:              "img", | ||||
| 					ImageID:            "img1234", | ||||
| 					State:              v1.ContainerState{Running: &v1.ContainerStateRunning{StartedAt: metav1.NewTime(nowTime)}}, | ||||
| 					ResourcesAllocated: CPU1AndMem1GAndStorage2G, | ||||
| 					Resources:          &v1.ResourceRequirements{Limits: CPU1AndMem1GAndStorage2G, Requests: CPU1AndMem1GAndStorage2G}, | ||||
| 				}, | ||||
| 			}, | ||||
| 		}, | ||||
| 		"BurstableQoSPod with CPU and memory CRI status, with ephemeral storage, nil resources in OldStatus": { | ||||
| 			Resources: []v1.ResourceRequirements{{Limits: CPU1AndMem1GAndStorage2G, Requests: CPU1AndMem1GAndStorage2G}}, | ||||
| 			OldStatus: []v1.ContainerStatus{ | ||||
| 				{ | ||||
| 					Name:    testContainerName, | ||||
| 					Image:   "img", | ||||
| 					ImageID: "img1234", | ||||
| 					State:   v1.ContainerState{Running: &v1.ContainerStateRunning{}}, | ||||
| 				}, | ||||
| 			}, | ||||
| 			Expected: []v1.ContainerStatus{ | ||||
| 				{ | ||||
| 					Name:               testContainerName, | ||||
| 					ContainerID:        testContainerID.String(), | ||||
| 					Image:              "img", | ||||
| 					ImageID:            "img1234", | ||||
| 					State:              v1.ContainerState{Running: &v1.ContainerStateRunning{StartedAt: metav1.NewTime(nowTime)}}, | ||||
| 					ResourcesAllocated: CPU1AndMem1GAndStorage2G, | ||||
| 					Resources:          &v1.ResourceRequirements{Limits: CPU1AndMem1GAndStorage2G, Requests: CPU1AndMem1GAndStorage2G}, | ||||
| 				}, | ||||
| 			}, | ||||
| 		}, | ||||
| 		"BestEffortQoSPod": { | ||||
| 			OldStatus: []v1.ContainerStatus{ | ||||
| 				{ | ||||
| 					Name:      testContainerName, | ||||
| 					Image:     "img", | ||||
| 					ImageID:   "img1234", | ||||
| 					State:     v1.ContainerState{Running: &v1.ContainerStateRunning{}}, | ||||
| 					Resources: &v1.ResourceRequirements{}, | ||||
| 				}, | ||||
| 			}, | ||||
| 			Expected: []v1.ContainerStatus{ | ||||
| 				{ | ||||
| 					Name:        testContainerName, | ||||
| 					ContainerID: testContainerID.String(), | ||||
| 					Image:       "img", | ||||
| 					ImageID:     "img1234", | ||||
| 					State:       v1.ContainerState{Running: &v1.ContainerStateRunning{StartedAt: metav1.NewTime(nowTime)}}, | ||||
| 					Resources:   &v1.ResourceRequirements{}, | ||||
| 				}, | ||||
| 			}, | ||||
| 		}, | ||||
| 	} { | ||||
| 		tPod := testPod.DeepCopy() | ||||
| 		tPod.Name = fmt.Sprintf("%s-%d", testPod.Name, idx) | ||||
| 		for i := range tPod.Spec.Containers { | ||||
| 			if tc.Resources != nil { | ||||
| 				tPod.Spec.Containers[i].Resources = tc.Resources[i] | ||||
| 			} | ||||
| 			kubelet.statusManager.SetPodAllocation(tPod) | ||||
| 			if tc.Resources != nil { | ||||
| 				tPod.Status.ContainerStatuses[i].ResourcesAllocated = tc.Resources[i].Requests | ||||
| 				testPodStatus.ContainerStatuses[i].Resources = &kubecontainer.ContainerResources{ | ||||
| 					MemoryLimit: tc.Resources[i].Limits.Memory(), | ||||
| 					CPULimit:    tc.Resources[i].Limits.Cpu(), | ||||
| 					CPURequest:  tc.Resources[i].Requests.Cpu(), | ||||
| 				} | ||||
| 			} | ||||
| 		} | ||||
|  | ||||
| 		t.Logf("TestCase: %q", tdesc) | ||||
| 		cStatuses := kubelet.convertToAPIContainerStatuses(tPod, testPodStatus, tc.OldStatus, tPod.Spec.Containers, false, false) | ||||
| 		assert.Equal(t, tc.Expected, cStatuses) | ||||
| 	} | ||||
| } | ||||
|   | ||||
| @@ -48,12 +48,15 @@ import ( | ||||
| 	"k8s.io/apimachinery/pkg/types" | ||||
| 	"k8s.io/apimachinery/pkg/util/sets" | ||||
| 	"k8s.io/apimachinery/pkg/util/wait" | ||||
| 	utilfeature "k8s.io/apiserver/pkg/util/feature" | ||||
| 	"k8s.io/client-go/kubernetes/fake" | ||||
| 	"k8s.io/client-go/tools/record" | ||||
| 	"k8s.io/client-go/util/flowcontrol" | ||||
| 	featuregatetesting "k8s.io/component-base/featuregate/testing" | ||||
| 	internalapi "k8s.io/cri-api/pkg/apis" | ||||
| 	runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1" | ||||
| 	"k8s.io/klog/v2/ktesting" | ||||
| 	"k8s.io/kubernetes/pkg/features" | ||||
| 	kubeletconfiginternal "k8s.io/kubernetes/pkg/kubelet/apis/config" | ||||
| 	cadvisortest "k8s.io/kubernetes/pkg/kubelet/cadvisor/testing" | ||||
| 	"k8s.io/kubernetes/pkg/kubelet/cm" | ||||
| @@ -260,7 +263,7 @@ func newTestKubeletWithImageList( | ||||
| 	kubelet.configMapManager = configMapManager | ||||
| 	kubelet.podManager = kubepod.NewBasicPodManager(fakeMirrorClient) | ||||
| 	podStartupLatencyTracker := kubeletutil.NewPodStartupLatencyTracker() | ||||
| 	kubelet.statusManager = status.NewManager(fakeKubeClient, kubelet.podManager, &statustest.FakePodDeletionSafetyProvider{}, podStartupLatencyTracker) | ||||
| 	kubelet.statusManager = status.NewManager(fakeKubeClient, kubelet.podManager, &statustest.FakePodDeletionSafetyProvider{}, podStartupLatencyTracker, kubelet.getRootDir()) | ||||
|  | ||||
| 	kubelet.containerRuntime = fakeRuntime | ||||
| 	kubelet.runtimeCache = containertest.NewFakeRuntimeCache(kubelet.containerRuntime) | ||||
| @@ -2436,6 +2439,162 @@ func TestHandlePodAdditionsInvokesPodAdmitHandlers(t *testing.T) { | ||||
| 	checkPodStatus(t, kl, podToAdmit, v1.PodPending) | ||||
| } | ||||
|  | ||||
| func TestHandlePodResourcesResize(t *testing.T) { | ||||
| 	defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.InPlacePodVerticalScaling, true)() | ||||
| 	testKubelet := newTestKubelet(t, false) | ||||
| 	defer testKubelet.Cleanup() | ||||
| 	kubelet := testKubelet.kubelet | ||||
| 	kubelet.statusManager = status.NewFakeManager() | ||||
|  | ||||
| 	cpu500m := resource.MustParse("500m") | ||||
| 	cpu1000m := resource.MustParse("1") | ||||
| 	cpu1500m := resource.MustParse("1500m") | ||||
| 	cpu2500m := resource.MustParse("2500m") | ||||
| 	cpu5000m := resource.MustParse("5000m") | ||||
| 	mem500M := resource.MustParse("500Mi") | ||||
| 	mem1000M := resource.MustParse("1Gi") | ||||
| 	mem1500M := resource.MustParse("1500Mi") | ||||
| 	mem2500M := resource.MustParse("2500Mi") | ||||
| 	mem4500M := resource.MustParse("4500Mi") | ||||
|  | ||||
| 	nodes := []*v1.Node{ | ||||
| 		{ | ||||
| 			ObjectMeta: metav1.ObjectMeta{Name: testKubeletHostname}, | ||||
| 			Status: v1.NodeStatus{ | ||||
| 				Capacity: v1.ResourceList{ | ||||
| 					v1.ResourceCPU:    resource.MustParse("8"), | ||||
| 					v1.ResourceMemory: resource.MustParse("8Gi"), | ||||
| 				}, | ||||
| 				Allocatable: v1.ResourceList{ | ||||
| 					v1.ResourceCPU:    resource.MustParse("4"), | ||||
| 					v1.ResourceMemory: resource.MustParse("4Gi"), | ||||
| 					v1.ResourcePods:   *resource.NewQuantity(40, resource.DecimalSI), | ||||
| 				}, | ||||
| 			}, | ||||
| 		}, | ||||
| 	} | ||||
| 	kubelet.nodeLister = testNodeLister{nodes: nodes} | ||||
|  | ||||
| 	testPod1 := &v1.Pod{ | ||||
| 		ObjectMeta: metav1.ObjectMeta{ | ||||
| 			UID:       "1111", | ||||
| 			Name:      "pod1", | ||||
| 			Namespace: "ns1", | ||||
| 		}, | ||||
| 		Spec: v1.PodSpec{ | ||||
| 			Containers: []v1.Container{ | ||||
| 				{ | ||||
| 					Name:  "c1", | ||||
| 					Image: "i1", | ||||
| 					Resources: v1.ResourceRequirements{ | ||||
| 						Requests: v1.ResourceList{v1.ResourceCPU: cpu1000m, v1.ResourceMemory: mem1000M}, | ||||
| 					}, | ||||
| 				}, | ||||
| 			}, | ||||
| 		}, | ||||
| 		Status: v1.PodStatus{ | ||||
| 			Phase: v1.PodRunning, | ||||
| 			ContainerStatuses: []v1.ContainerStatus{ | ||||
| 				{ | ||||
| 					Name:               "c1", | ||||
| 					ResourcesAllocated: v1.ResourceList{v1.ResourceCPU: cpu1000m, v1.ResourceMemory: mem1000M}, | ||||
| 					Resources:          &v1.ResourceRequirements{}, | ||||
| 				}, | ||||
| 			}, | ||||
| 		}, | ||||
| 	} | ||||
| 	testPod2 := testPod1.DeepCopy() | ||||
| 	testPod2.UID = "2222" | ||||
| 	testPod2.Name = "pod2" | ||||
| 	testPod2.Namespace = "ns2" | ||||
| 	testPod3 := testPod1.DeepCopy() | ||||
| 	testPod3.UID = "3333" | ||||
| 	testPod3.Name = "pod3" | ||||
| 	testPod3.Namespace = "ns2" | ||||
|  | ||||
| 	testKubelet.fakeKubeClient = fake.NewSimpleClientset(testPod1, testPod2, testPod3) | ||||
| 	kubelet.kubeClient = testKubelet.fakeKubeClient | ||||
| 	defer testKubelet.fakeKubeClient.ClearActions() | ||||
| 	kubelet.podManager.AddPod(testPod1) | ||||
| 	kubelet.podManager.AddPod(testPod2) | ||||
| 	kubelet.podManager.AddPod(testPod3) | ||||
| 	kubelet.podWorkers.(*fakePodWorkers).running = map[types.UID]bool{ | ||||
| 		testPod1.UID: true, | ||||
| 		testPod2.UID: true, | ||||
| 		testPod3.UID: true, | ||||
| 	} | ||||
| 	defer kubelet.podManager.DeletePod(testPod3) | ||||
| 	defer kubelet.podManager.DeletePod(testPod2) | ||||
| 	defer kubelet.podManager.DeletePod(testPod1) | ||||
|  | ||||
| 	tests := []struct { | ||||
| 		name                string | ||||
| 		pod                 *v1.Pod | ||||
| 		newRequests         v1.ResourceList | ||||
| 		expectedAllocations v1.ResourceList | ||||
| 		expectedResize      v1.PodResizeStatus | ||||
| 	}{ | ||||
| 		{ | ||||
| 			name:                "Request CPU and memory decrease - expect InProgress", | ||||
| 			pod:                 testPod2, | ||||
| 			newRequests:         v1.ResourceList{v1.ResourceCPU: cpu500m, v1.ResourceMemory: mem500M}, | ||||
| 			expectedAllocations: v1.ResourceList{v1.ResourceCPU: cpu500m, v1.ResourceMemory: mem500M}, | ||||
| 			expectedResize:      v1.PodResizeStatusInProgress, | ||||
| 		}, | ||||
| 		{ | ||||
| 			name:                "Request CPU increase, memory decrease - expect InProgress", | ||||
| 			pod:                 testPod2, | ||||
| 			newRequests:         v1.ResourceList{v1.ResourceCPU: cpu1500m, v1.ResourceMemory: mem500M}, | ||||
| 			expectedAllocations: v1.ResourceList{v1.ResourceCPU: cpu1500m, v1.ResourceMemory: mem500M}, | ||||
| 			expectedResize:      v1.PodResizeStatusInProgress, | ||||
| 		}, | ||||
| 		{ | ||||
| 			name:                "Request CPU decrease, memory increase - expect InProgress", | ||||
| 			pod:                 testPod2, | ||||
| 			newRequests:         v1.ResourceList{v1.ResourceCPU: cpu500m, v1.ResourceMemory: mem1500M}, | ||||
| 			expectedAllocations: v1.ResourceList{v1.ResourceCPU: cpu500m, v1.ResourceMemory: mem1500M}, | ||||
| 			expectedResize:      v1.PodResizeStatusInProgress, | ||||
| 		}, | ||||
| 		{ | ||||
| 			name:                "Request CPU and memory increase beyond current capacity - expect Deferred", | ||||
| 			pod:                 testPod2, | ||||
| 			newRequests:         v1.ResourceList{v1.ResourceCPU: cpu2500m, v1.ResourceMemory: mem2500M}, | ||||
| 			expectedAllocations: v1.ResourceList{v1.ResourceCPU: cpu1000m, v1.ResourceMemory: mem1000M}, | ||||
| 			expectedResize:      v1.PodResizeStatusDeferred, | ||||
| 		}, | ||||
| 		{ | ||||
| 			name:                "Request CPU decrease and memory increase beyond current capacity - expect Deferred", | ||||
| 			pod:                 testPod2, | ||||
| 			newRequests:         v1.ResourceList{v1.ResourceCPU: cpu500m, v1.ResourceMemory: mem2500M}, | ||||
| 			expectedAllocations: v1.ResourceList{v1.ResourceCPU: cpu1000m, v1.ResourceMemory: mem1000M}, | ||||
| 			expectedResize:      v1.PodResizeStatusDeferred, | ||||
| 		}, | ||||
| 		{ | ||||
| 			name:                "Request memory increase beyond node capacity - expect Infeasible", | ||||
| 			pod:                 testPod2, | ||||
| 			newRequests:         v1.ResourceList{v1.ResourceCPU: cpu1000m, v1.ResourceMemory: mem4500M}, | ||||
| 			expectedAllocations: v1.ResourceList{v1.ResourceCPU: cpu1000m, v1.ResourceMemory: mem1000M}, | ||||
| 			expectedResize:      v1.PodResizeStatusInfeasible, | ||||
| 		}, | ||||
| 		{ | ||||
| 			name:                "Request CPU increase beyond node capacity - expect Infeasible", | ||||
| 			pod:                 testPod2, | ||||
| 			newRequests:         v1.ResourceList{v1.ResourceCPU: cpu5000m, v1.ResourceMemory: mem1000M}, | ||||
| 			expectedAllocations: v1.ResourceList{v1.ResourceCPU: cpu1000m, v1.ResourceMemory: mem1000M}, | ||||
| 			expectedResize:      v1.PodResizeStatusInfeasible, | ||||
| 		}, | ||||
| 	} | ||||
|  | ||||
| 	for _, tt := range tests { | ||||
| 		tt.pod.Spec.Containers[0].Resources.Requests = tt.newRequests | ||||
| 		tt.pod.Status.ContainerStatuses[0].ResourcesAllocated = v1.ResourceList{v1.ResourceCPU: cpu1000m, v1.ResourceMemory: mem1000M} | ||||
| 		kubelet.handlePodResourcesResize(tt.pod) | ||||
| 		assert.Equal(t, tt.expectedAllocations, tt.pod.Status.ContainerStatuses[0].ResourcesAllocated, tt.name) | ||||
| 		assert.Equal(t, tt.expectedResize, tt.pod.Status.Resize, tt.name) | ||||
| 		testKubelet.fakeKubeClient.ClearActions() | ||||
| 	} | ||||
| } | ||||
|  | ||||
| // testPodSyncLoopHandler is a lifecycle.PodSyncLoopHandler that is used for testing. | ||||
| type testPodSyncLoopHandler struct { | ||||
| 	// list of pods to sync | ||||
|   | ||||
| @@ -94,12 +94,13 @@ func (m *kubeGenericRuntimeManager) toKubeContainer(c *runtimeapi.Container) (*k | ||||
|  | ||||
| 	annotatedInfo := getContainerInfoFromAnnotations(c.Annotations) | ||||
| 	return &kubecontainer.Container{ | ||||
| 		ID:      kubecontainer.ContainerID{Type: m.runtimeName, ID: c.Id}, | ||||
| 		Name:    c.GetMetadata().GetName(), | ||||
| 		ImageID: c.ImageRef, | ||||
| 		Image:   c.Image.Image, | ||||
| 		Hash:    annotatedInfo.Hash, | ||||
| 		State:   toKubeContainerState(c.State), | ||||
| 		ID:                   kubecontainer.ContainerID{Type: m.runtimeName, ID: c.Id}, | ||||
| 		Name:                 c.GetMetadata().GetName(), | ||||
| 		ImageID:              c.ImageRef, | ||||
| 		Image:                c.Image.Image, | ||||
| 		Hash:                 annotatedInfo.Hash, | ||||
| 		HashWithoutResources: annotatedInfo.HashWithoutResources, | ||||
| 		State:                toKubeContainerState(c.State), | ||||
| 	}, nil | ||||
| } | ||||
|  | ||||
|   | ||||
| @@ -19,6 +19,11 @@ limitations under the License. | ||||
|  | ||||
| package kuberuntime | ||||
|  | ||||
| import ( | ||||
| 	"k8s.io/kubernetes/pkg/kubelet/cm" | ||||
| 	"math" | ||||
| ) | ||||
|  | ||||
| const ( | ||||
| 	milliCPUToCPU = 1000 | ||||
|  | ||||
| @@ -53,3 +58,22 @@ func milliCPUToQuota(milliCPU int64, period int64) (quota int64) { | ||||
|  | ||||
| 	return | ||||
| } | ||||
|  | ||||
| // sharesToMilliCPU converts CpuShares (cpu.shares) to milli-CPU value | ||||
| // TODO(vinaykul,InPlacePodVerticalScaling): Address issue that sets min req/limit to 2m/10m before beta | ||||
| // See: https://github.com/kubernetes/kubernetes/pull/102884#discussion_r662552642 | ||||
| func sharesToMilliCPU(shares int64) int64 { | ||||
| 	milliCPU := int64(0) | ||||
| 	if shares >= int64(cm.MinShares) { | ||||
| 		milliCPU = int64(math.Ceil(float64(shares*milliCPUToCPU) / float64(cm.SharesPerCPU))) | ||||
| 	} | ||||
| 	return milliCPU | ||||
| } | ||||
|  | ||||
| // quotaToMilliCPU converts cpu.cfs_quota_us and cpu.cfs_period_us to milli-CPU value | ||||
| func quotaToMilliCPU(quota int64, period int64) int64 { | ||||
| 	if quota == -1 { | ||||
| 		return int64(0) | ||||
| 	} | ||||
| 	return (quota * milliCPUToCPU) / period | ||||
| } | ||||
|   | ||||
| @@ -28,6 +28,7 @@ import ( | ||||
| 	featuregatetesting "k8s.io/component-base/featuregate/testing" | ||||
| 	runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1" | ||||
| 	"k8s.io/kubernetes/pkg/features" | ||||
| 	"k8s.io/kubernetes/pkg/kubelet/cm" | ||||
| 	utilpointer "k8s.io/utils/pointer" | ||||
| ) | ||||
|  | ||||
| @@ -670,3 +671,81 @@ func TestGetSeccompProfileDefaultSeccomp(t *testing.T) { | ||||
| func getLocal(v string) *string { | ||||
| 	return &v | ||||
| } | ||||
|  | ||||
| func TestSharesToMilliCPU(t *testing.T) { | ||||
| 	knownMilliCPUToShares := map[int64]int64{ | ||||
| 		0:    2, | ||||
| 		1:    2, | ||||
| 		2:    2, | ||||
| 		3:    3, | ||||
| 		4:    4, | ||||
| 		32:   32, | ||||
| 		64:   65, | ||||
| 		100:  102, | ||||
| 		250:  256, | ||||
| 		500:  512, | ||||
| 		1000: 1024, | ||||
| 		1500: 1536, | ||||
| 		2000: 2048, | ||||
| 	} | ||||
|  | ||||
| 	t.Run("sharesToMilliCPUTest", func(t *testing.T) { | ||||
| 		var testMilliCPU int64 | ||||
| 		for testMilliCPU = 0; testMilliCPU <= 2000; testMilliCPU++ { | ||||
| 			shares := int64(cm.MilliCPUToShares(testMilliCPU)) | ||||
| 			if expectedShares, found := knownMilliCPUToShares[testMilliCPU]; found { | ||||
| 				if shares != expectedShares { | ||||
| 					t.Errorf("Test milliCPIToShares: Input milliCPU %v, expected shares %v, but got %v", testMilliCPU, expectedShares, shares) | ||||
| 				} | ||||
| 			} | ||||
| 			expectedMilliCPU := testMilliCPU | ||||
| 			if testMilliCPU < 2 { | ||||
| 				expectedMilliCPU = 2 | ||||
| 			} | ||||
| 			milliCPU := sharesToMilliCPU(shares) | ||||
| 			if milliCPU != expectedMilliCPU { | ||||
| 				t.Errorf("Test sharesToMilliCPU: Input shares %v, expected milliCPU %v, but got %v", shares, expectedMilliCPU, milliCPU) | ||||
| 			} | ||||
| 		} | ||||
| 	}) | ||||
| } | ||||
|  | ||||
| func TestQuotaToMilliCPU(t *testing.T) { | ||||
| 	for _, tc := range []struct { | ||||
| 		name     string | ||||
| 		quota    int64 | ||||
| 		period   int64 | ||||
| 		expected int64 | ||||
| 	}{ | ||||
| 		{ | ||||
| 			name:     "50m", | ||||
| 			quota:    int64(5000), | ||||
| 			period:   int64(100000), | ||||
| 			expected: int64(50), | ||||
| 		}, | ||||
| 		{ | ||||
| 			name:     "750m", | ||||
| 			quota:    int64(75000), | ||||
| 			period:   int64(100000), | ||||
| 			expected: int64(750), | ||||
| 		}, | ||||
| 		{ | ||||
| 			name:     "1000m", | ||||
| 			quota:    int64(100000), | ||||
| 			period:   int64(100000), | ||||
| 			expected: int64(1000), | ||||
| 		}, | ||||
| 		{ | ||||
| 			name:     "1500m", | ||||
| 			quota:    int64(150000), | ||||
| 			period:   int64(100000), | ||||
| 			expected: int64(1500), | ||||
| 		}} { | ||||
| 		t.Run(tc.name, func(t *testing.T) { | ||||
| 			milliCPU := quotaToMilliCPU(tc.quota, tc.period) | ||||
| 			if milliCPU != tc.expected { | ||||
| 				t.Errorf("Test %s: Input quota %v and period %v, expected milliCPU %v, but got %v", tc.name, tc.quota, tc.period, tc.expected, milliCPU) | ||||
| 			} | ||||
| 		}) | ||||
| 	} | ||||
| } | ||||
|   | ||||
| @@ -46,7 +46,9 @@ import ( | ||||
| 	kubetypes "k8s.io/apimachinery/pkg/types" | ||||
| 	utilruntime "k8s.io/apimachinery/pkg/util/runtime" | ||||
| 	"k8s.io/apimachinery/pkg/util/sets" | ||||
| 	utilfeature "k8s.io/apiserver/pkg/util/feature" | ||||
| 	runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1" | ||||
| 	"k8s.io/kubernetes/pkg/features" | ||||
| 	kubecontainer "k8s.io/kubernetes/pkg/kubelet/container" | ||||
| 	"k8s.io/kubernetes/pkg/kubelet/cri/remote" | ||||
| 	"k8s.io/kubernetes/pkg/kubelet/events" | ||||
| @@ -359,6 +361,18 @@ func (m *kubeGenericRuntimeManager) generateContainerConfig(ctx context.Context, | ||||
| 	return config, cleanupAction, nil | ||||
| } | ||||
|  | ||||
| func (m *kubeGenericRuntimeManager) updateContainerResources(pod *v1.Pod, container *v1.Container, containerID kubecontainer.ContainerID) error { | ||||
| 	containerResources := m.generateContainerResources(pod, container) | ||||
| 	if containerResources == nil { | ||||
| 		return fmt.Errorf("container %q updateContainerResources failed: cannot generate resources config", containerID.String()) | ||||
| 	} | ||||
| 	err := m.runtimeService.UpdateContainerResources(containerID.ID, containerResources) | ||||
| 	if err != nil { | ||||
| 		klog.ErrorS(err, "UpdateContainerResources failed", "container", containerID.String()) | ||||
| 	} | ||||
| 	return err | ||||
| } | ||||
|  | ||||
| // makeDevices generates container devices for kubelet runtime v1. | ||||
| func makeDevices(opts *kubecontainer.RunContainerOptions) []*runtimeapi.Device { | ||||
| 	devices := make([]*runtimeapi.Device, len(opts.Devices)) | ||||
| @@ -557,18 +571,25 @@ func (m *kubeGenericRuntimeManager) getPodContainerStatuses(ctx context.Context, | ||||
| func toKubeContainerStatus(status *runtimeapi.ContainerStatus, runtimeName string) *kubecontainer.Status { | ||||
| 	annotatedInfo := getContainerInfoFromAnnotations(status.Annotations) | ||||
| 	labeledInfo := getContainerInfoFromLabels(status.Labels) | ||||
| 	var cStatusResources *kubecontainer.ContainerResources | ||||
| 	if utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) { | ||||
| 		// If runtime reports cpu & memory resources info, add it to container status | ||||
| 		cStatusResources = toKubeContainerResources(status.Resources) | ||||
| 	} | ||||
| 	cStatus := &kubecontainer.Status{ | ||||
| 		ID: kubecontainer.ContainerID{ | ||||
| 			Type: runtimeName, | ||||
| 			ID:   status.Id, | ||||
| 		}, | ||||
| 		Name:         labeledInfo.ContainerName, | ||||
| 		Image:        status.Image.Image, | ||||
| 		ImageID:      status.ImageRef, | ||||
| 		Hash:         annotatedInfo.Hash, | ||||
| 		RestartCount: annotatedInfo.RestartCount, | ||||
| 		State:        toKubeContainerState(status.State), | ||||
| 		CreatedAt:    time.Unix(0, status.CreatedAt), | ||||
| 		Name:                 labeledInfo.ContainerName, | ||||
| 		Image:                status.Image.Image, | ||||
| 		ImageID:              status.ImageRef, | ||||
| 		Hash:                 annotatedInfo.Hash, | ||||
| 		HashWithoutResources: annotatedInfo.HashWithoutResources, | ||||
| 		RestartCount:         annotatedInfo.RestartCount, | ||||
| 		State:                toKubeContainerState(status.State), | ||||
| 		CreatedAt:            time.Unix(0, status.CreatedAt), | ||||
| 		Resources:            cStatusResources, | ||||
| 	} | ||||
|  | ||||
| 	if status.State != runtimeapi.ContainerState_CONTAINER_CREATED { | ||||
|   | ||||
| @@ -60,7 +60,7 @@ func (m *kubeGenericRuntimeManager) generateLinuxContainerConfig(container *v1.C | ||||
| 		return nil, err | ||||
| 	} | ||||
| 	lc := &runtimeapi.LinuxContainerConfig{ | ||||
| 		Resources:       &runtimeapi.LinuxContainerResources{}, | ||||
| 		Resources:       m.generateLinuxContainerResources(pod, container, enforceMemoryQoS), | ||||
| 		SecurityContext: sc, | ||||
| 	} | ||||
|  | ||||
| @@ -69,17 +69,22 @@ func (m *kubeGenericRuntimeManager) generateLinuxContainerConfig(container *v1.C | ||||
| 		lc.SecurityContext.NamespaceOptions.TargetId = nsTarget.ID | ||||
| 	} | ||||
|  | ||||
| 	return lc, nil | ||||
| } | ||||
|  | ||||
| // generateLinuxContainerResources generates linux container resources config for runtime | ||||
| func (m *kubeGenericRuntimeManager) generateLinuxContainerResources(pod *v1.Pod, container *v1.Container, enforceMemoryQoS bool) *runtimeapi.LinuxContainerResources { | ||||
| 	// set linux container resources | ||||
| 	var cpuRequest *resource.Quantity | ||||
| 	if _, cpuRequestExists := container.Resources.Requests[v1.ResourceCPU]; cpuRequestExists { | ||||
| 		cpuRequest = container.Resources.Requests.Cpu() | ||||
| 	} | ||||
| 	lc.Resources = m.calculateLinuxResources(cpuRequest, container.Resources.Limits.Cpu(), container.Resources.Limits.Memory()) | ||||
| 	lcr := m.calculateLinuxResources(cpuRequest, container.Resources.Limits.Cpu(), container.Resources.Limits.Memory()) | ||||
|  | ||||
| 	lc.Resources.OomScoreAdj = int64(qos.GetContainerOOMScoreAdjust(pod, container, | ||||
| 	lcr.OomScoreAdj = int64(qos.GetContainerOOMScoreAdjust(pod, container, | ||||
| 		int64(m.machineInfo.MemoryCapacity))) | ||||
|  | ||||
| 	lc.Resources.HugepageLimits = GetHugepageLimitsFromResources(container.Resources) | ||||
| 	lcr.HugepageLimits = GetHugepageLimitsFromResources(container.Resources) | ||||
|  | ||||
| 	if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.NodeSwap) { | ||||
| 		// NOTE(ehashman): Behaviour is defined in the opencontainers runtime spec: | ||||
| @@ -87,14 +92,14 @@ func (m *kubeGenericRuntimeManager) generateLinuxContainerConfig(container *v1.C | ||||
| 		switch m.memorySwapBehavior { | ||||
| 		case kubelettypes.UnlimitedSwap: | ||||
| 			// -1 = unlimited swap | ||||
| 			lc.Resources.MemorySwapLimitInBytes = -1 | ||||
| 			lcr.MemorySwapLimitInBytes = -1 | ||||
| 		case kubelettypes.LimitedSwap: | ||||
| 			fallthrough | ||||
| 		default: | ||||
| 			// memorySwapLimit = total permitted memory+swap; if equal to memory limit, => 0 swap above memory limit | ||||
| 			// Some swapping is still possible. | ||||
| 			// Note that if memory limit is 0, memory swap limit is ignored. | ||||
| 			lc.Resources.MemorySwapLimitInBytes = lc.Resources.MemoryLimitInBytes | ||||
| 			lcr.MemorySwapLimitInBytes = lcr.MemoryLimitInBytes | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| @@ -125,18 +130,31 @@ func (m *kubeGenericRuntimeManager) generateLinuxContainerConfig(container *v1.C | ||||
| 			unified[cm.MemoryHigh] = strconv.FormatInt(memoryHigh, 10) | ||||
| 		} | ||||
| 		if len(unified) > 0 { | ||||
| 			if lc.Resources.Unified == nil { | ||||
| 				lc.Resources.Unified = unified | ||||
| 			if lcr.Unified == nil { | ||||
| 				lcr.Unified = unified | ||||
| 			} else { | ||||
| 				for k, v := range unified { | ||||
| 					lc.Resources.Unified[k] = v | ||||
| 					lcr.Unified[k] = v | ||||
| 				} | ||||
| 			} | ||||
| 			klog.V(4).InfoS("MemoryQoS config for container", "pod", klog.KObj(pod), "containerName", container.Name, "unified", unified) | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	return lc, nil | ||||
| 	return lcr | ||||
| } | ||||
|  | ||||
| // generateContainerResources generates platform specific (linux) container resources config for runtime | ||||
| func (m *kubeGenericRuntimeManager) generateContainerResources(pod *v1.Pod, container *v1.Container) *runtimeapi.ContainerResources { | ||||
| 	enforceMemoryQoS := false | ||||
| 	// Set memory.min and memory.high if MemoryQoS enabled with cgroups v2 | ||||
| 	if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.MemoryQoS) && | ||||
| 		libcontainercgroups.IsCgroup2UnifiedMode() { | ||||
| 		enforceMemoryQoS = true | ||||
| 	} | ||||
| 	return &runtimeapi.ContainerResources{ | ||||
| 		Linux: m.generateLinuxContainerResources(pod, container, enforceMemoryQoS), | ||||
| 	} | ||||
| } | ||||
|  | ||||
| // calculateLinuxResources will create the linuxContainerResources type based on the provided CPU and memory resource requests, limits | ||||
| @@ -218,3 +236,34 @@ func GetHugepageLimitsFromResources(resources v1.ResourceRequirements) []*runtim | ||||
|  | ||||
| 	return hugepageLimits | ||||
| } | ||||
|  | ||||
| func toKubeContainerResources(statusResources *runtimeapi.ContainerResources) *kubecontainer.ContainerResources { | ||||
| 	var cStatusResources *kubecontainer.ContainerResources | ||||
| 	runtimeStatusResources := statusResources.GetLinux() | ||||
| 	if runtimeStatusResources != nil { | ||||
| 		var cpuLimit, memLimit, cpuRequest *resource.Quantity | ||||
| 		if runtimeStatusResources.CpuPeriod > 0 { | ||||
| 			milliCPU := quotaToMilliCPU(runtimeStatusResources.CpuQuota, runtimeStatusResources.CpuPeriod) | ||||
| 			if milliCPU > 0 { | ||||
| 				cpuLimit = resource.NewMilliQuantity(milliCPU, resource.DecimalSI) | ||||
| 			} | ||||
| 		} | ||||
| 		if runtimeStatusResources.CpuShares > 0 { | ||||
| 			milliCPU := sharesToMilliCPU(runtimeStatusResources.CpuShares) | ||||
| 			if milliCPU > 0 { | ||||
| 				cpuRequest = resource.NewMilliQuantity(milliCPU, resource.DecimalSI) | ||||
| 			} | ||||
| 		} | ||||
| 		if runtimeStatusResources.MemoryLimitInBytes > 0 { | ||||
| 			memLimit = resource.NewQuantity(runtimeStatusResources.MemoryLimitInBytes, resource.BinarySI) | ||||
| 		} | ||||
| 		if cpuLimit != nil || memLimit != nil || cpuRequest != nil { | ||||
| 			cStatusResources = &kubecontainer.ContainerResources{ | ||||
| 				CPULimit:    cpuLimit, | ||||
| 				CPURequest:  cpuRequest, | ||||
| 				MemoryLimit: memLimit, | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
| 	return cStatusResources | ||||
| } | ||||
|   | ||||
| @@ -31,6 +31,7 @@ import ( | ||||
| 	v1 "k8s.io/api/core/v1" | ||||
| 	"k8s.io/apimachinery/pkg/api/resource" | ||||
| 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" | ||||
| 	"k8s.io/apimachinery/pkg/util/diff" | ||||
| 	utilfeature "k8s.io/apiserver/pkg/util/feature" | ||||
| 	featuregatetesting "k8s.io/component-base/featuregate/testing" | ||||
| 	runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1" | ||||
| @@ -712,3 +713,167 @@ func TestGenerateLinuxContainerConfigSwap(t *testing.T) { | ||||
| 		}) | ||||
| 	} | ||||
| } | ||||
|  | ||||
| func TestGenerateLinuxContainerResources(t *testing.T) { | ||||
| 	_, _, m, err := createTestRuntimeManager() | ||||
| 	assert.NoError(t, err) | ||||
| 	m.machineInfo.MemoryCapacity = 17179860387 // 16GB | ||||
|  | ||||
| 	pod := &v1.Pod{ | ||||
| 		ObjectMeta: metav1.ObjectMeta{ | ||||
| 			UID:       "12345678", | ||||
| 			Name:      "foo", | ||||
| 			Namespace: "bar", | ||||
| 		}, | ||||
| 		Spec: v1.PodSpec{ | ||||
| 			Containers: []v1.Container{ | ||||
| 				{ | ||||
| 					Name:  "c1", | ||||
| 					Image: "busybox", | ||||
| 				}, | ||||
| 			}, | ||||
| 		}, | ||||
| 		Status: v1.PodStatus{}, | ||||
| 	} | ||||
|  | ||||
| 	for _, tc := range []struct { | ||||
| 		name      string | ||||
| 		scalingFg bool | ||||
| 		limits    v1.ResourceList | ||||
| 		requests  v1.ResourceList | ||||
| 		cStatus   []v1.ContainerStatus | ||||
| 		expected  *runtimeapi.LinuxContainerResources | ||||
| 	}{ | ||||
| 		{ | ||||
| 			"requests & limits, cpu & memory, guaranteed qos - no container status", | ||||
| 			true, | ||||
| 			v1.ResourceList{v1.ResourceCPU: resource.MustParse("250m"), v1.ResourceMemory: resource.MustParse("500Mi")}, | ||||
| 			v1.ResourceList{v1.ResourceCPU: resource.MustParse("250m"), v1.ResourceMemory: resource.MustParse("500Mi")}, | ||||
| 			[]v1.ContainerStatus{}, | ||||
| 			&runtimeapi.LinuxContainerResources{CpuShares: 256, MemoryLimitInBytes: 524288000, OomScoreAdj: -997}, | ||||
| 		}, | ||||
| 		{ | ||||
| 			"requests & limits, cpu & memory, burstable qos - no container status", | ||||
| 			true, | ||||
| 			v1.ResourceList{v1.ResourceCPU: resource.MustParse("500m"), v1.ResourceMemory: resource.MustParse("750Mi")}, | ||||
| 			v1.ResourceList{v1.ResourceCPU: resource.MustParse("250m"), v1.ResourceMemory: resource.MustParse("500Mi")}, | ||||
| 			[]v1.ContainerStatus{}, | ||||
| 			&runtimeapi.LinuxContainerResources{CpuShares: 256, MemoryLimitInBytes: 786432000, OomScoreAdj: 970}, | ||||
| 		}, | ||||
| 		{ | ||||
| 			"best-effort qos - no container status", | ||||
| 			true, | ||||
| 			nil, | ||||
| 			nil, | ||||
| 			[]v1.ContainerStatus{}, | ||||
| 			&runtimeapi.LinuxContainerResources{CpuShares: 2, OomScoreAdj: 1000}, | ||||
| 		}, | ||||
| 		{ | ||||
| 			"requests & limits, cpu & memory, guaranteed qos - empty resources container status", | ||||
| 			true, | ||||
| 			v1.ResourceList{v1.ResourceCPU: resource.MustParse("250m"), v1.ResourceMemory: resource.MustParse("500Mi")}, | ||||
| 			v1.ResourceList{v1.ResourceCPU: resource.MustParse("250m"), v1.ResourceMemory: resource.MustParse("500Mi")}, | ||||
| 			[]v1.ContainerStatus{{Name: "c1"}}, | ||||
| 			&runtimeapi.LinuxContainerResources{CpuShares: 256, MemoryLimitInBytes: 524288000, OomScoreAdj: -997}, | ||||
| 		}, | ||||
| 		{ | ||||
| 			"requests & limits, cpu & memory, burstable qos - empty resources container status", | ||||
| 			true, | ||||
| 			v1.ResourceList{v1.ResourceCPU: resource.MustParse("500m"), v1.ResourceMemory: resource.MustParse("750Mi")}, | ||||
| 			v1.ResourceList{v1.ResourceCPU: resource.MustParse("250m"), v1.ResourceMemory: resource.MustParse("500Mi")}, | ||||
| 			[]v1.ContainerStatus{{Name: "c1"}}, | ||||
| 			&runtimeapi.LinuxContainerResources{CpuShares: 256, MemoryLimitInBytes: 786432000, OomScoreAdj: 999}, | ||||
| 		}, | ||||
| 		{ | ||||
| 			"best-effort qos - empty resources container status", | ||||
| 			true, | ||||
| 			nil, | ||||
| 			nil, | ||||
| 			[]v1.ContainerStatus{{Name: "c1"}}, | ||||
| 			&runtimeapi.LinuxContainerResources{CpuShares: 2, OomScoreAdj: 1000}, | ||||
| 		}, | ||||
| 		{ | ||||
| 			"requests & limits, cpu & memory, guaranteed qos - container status with resourcesAllocated", | ||||
| 			true, | ||||
| 			v1.ResourceList{v1.ResourceCPU: resource.MustParse("200m"), v1.ResourceMemory: resource.MustParse("500Mi")}, | ||||
| 			v1.ResourceList{v1.ResourceCPU: resource.MustParse("200m"), v1.ResourceMemory: resource.MustParse("500Mi")}, | ||||
| 			[]v1.ContainerStatus{ | ||||
| 				{ | ||||
| 					Name:               "c1", | ||||
| 					ResourcesAllocated: v1.ResourceList{v1.ResourceCPU: resource.MustParse("200m"), v1.ResourceMemory: resource.MustParse("500Mi")}, | ||||
| 				}, | ||||
| 			}, | ||||
| 			&runtimeapi.LinuxContainerResources{CpuShares: 204, MemoryLimitInBytes: 524288000, OomScoreAdj: -997}, | ||||
| 		}, | ||||
| 		{ | ||||
| 			"requests & limits, cpu & memory, burstable qos - container status with resourcesAllocated", | ||||
| 			true, | ||||
| 			v1.ResourceList{v1.ResourceCPU: resource.MustParse("500m"), v1.ResourceMemory: resource.MustParse("750Mi")}, | ||||
| 			v1.ResourceList{v1.ResourceCPU: resource.MustParse("250m"), v1.ResourceMemory: resource.MustParse("500Mi")}, | ||||
| 			[]v1.ContainerStatus{ | ||||
| 				{ | ||||
| 					Name:               "c1", | ||||
| 					ResourcesAllocated: v1.ResourceList{v1.ResourceCPU: resource.MustParse("250m"), v1.ResourceMemory: resource.MustParse("500Mi")}, | ||||
| 				}, | ||||
| 			}, | ||||
| 			&runtimeapi.LinuxContainerResources{CpuShares: 256, MemoryLimitInBytes: 786432000, OomScoreAdj: 970}, | ||||
| 		}, | ||||
| 		{ | ||||
| 			"requests & limits, cpu & memory, guaranteed qos - no container status", | ||||
| 			false, | ||||
| 			v1.ResourceList{v1.ResourceCPU: resource.MustParse("250m"), v1.ResourceMemory: resource.MustParse("500Mi")}, | ||||
| 			v1.ResourceList{v1.ResourceCPU: resource.MustParse("250m"), v1.ResourceMemory: resource.MustParse("500Mi")}, | ||||
| 			[]v1.ContainerStatus{}, | ||||
| 			&runtimeapi.LinuxContainerResources{CpuShares: 256, MemoryLimitInBytes: 524288000, OomScoreAdj: -997}, | ||||
| 		}, | ||||
| 		{ | ||||
| 			"requests & limits, cpu & memory, burstable qos - container status with resourcesAllocated", | ||||
| 			false, | ||||
| 			v1.ResourceList{v1.ResourceCPU: resource.MustParse("500m"), v1.ResourceMemory: resource.MustParse("750Mi")}, | ||||
| 			v1.ResourceList{v1.ResourceCPU: resource.MustParse("250m"), v1.ResourceMemory: resource.MustParse("500Mi")}, | ||||
| 			[]v1.ContainerStatus{ | ||||
| 				{ | ||||
| 					Name:               "c1", | ||||
| 					ResourcesAllocated: v1.ResourceList{v1.ResourceCPU: resource.MustParse("250m"), v1.ResourceMemory: resource.MustParse("500Mi")}, | ||||
| 				}, | ||||
| 			}, | ||||
| 			&runtimeapi.LinuxContainerResources{CpuShares: 256, MemoryLimitInBytes: 786432000, OomScoreAdj: 970}, | ||||
| 		}, | ||||
| 		{ | ||||
| 			"requests & limits, cpu & memory, guaranteed qos - container status with resourcesAllocated", | ||||
| 			false, | ||||
| 			v1.ResourceList{v1.ResourceCPU: resource.MustParse("200m"), v1.ResourceMemory: resource.MustParse("500Mi")}, | ||||
| 			v1.ResourceList{v1.ResourceCPU: resource.MustParse("200m"), v1.ResourceMemory: resource.MustParse("500Mi")}, | ||||
| 			[]v1.ContainerStatus{ | ||||
| 				{ | ||||
| 					Name:               "c1", | ||||
| 					ResourcesAllocated: v1.ResourceList{v1.ResourceCPU: resource.MustParse("200m"), v1.ResourceMemory: resource.MustParse("500Mi")}, | ||||
| 				}, | ||||
| 			}, | ||||
| 			&runtimeapi.LinuxContainerResources{CpuShares: 204, MemoryLimitInBytes: 524288000, OomScoreAdj: -997}, | ||||
| 		}, | ||||
| 		{ | ||||
| 			"best-effort qos - no container status", | ||||
| 			false, | ||||
| 			nil, | ||||
| 			nil, | ||||
| 			[]v1.ContainerStatus{}, | ||||
| 			&runtimeapi.LinuxContainerResources{CpuShares: 2, OomScoreAdj: 1000}, | ||||
| 		}, | ||||
| 	} { | ||||
| 		t.Run(tc.name, func(t *testing.T) { | ||||
| 			if tc.scalingFg { | ||||
| 				defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.InPlacePodVerticalScaling, true)() | ||||
| 			} | ||||
| 			tc.expected.HugepageLimits = []*runtimeapi.HugepageLimit{{PageSize: "2MB", Limit: 0}, {PageSize: "1GB", Limit: 0}} | ||||
| 			pod.Spec.Containers[0].Resources = v1.ResourceRequirements{Limits: tc.limits, Requests: tc.requests} | ||||
| 			if len(tc.cStatus) > 0 { | ||||
| 				pod.Status.ContainerStatuses = tc.cStatus | ||||
| 			} | ||||
| 			resources := m.generateLinuxContainerResources(pod, &pod.Spec.Containers[0], false) | ||||
| 			if diff.ObjectDiff(resources, tc.expected) != "" { | ||||
| 				t.Errorf("Test %s: expected resources %+v, but got %+v", tc.name, tc.expected, resources) | ||||
| 			} | ||||
| 		}) | ||||
| 	} | ||||
| } | ||||
|   | ||||
| @@ -28,6 +28,7 @@ import ( | ||||
| 	"github.com/google/go-cmp/cmp" | ||||
| 	"github.com/stretchr/testify/assert" | ||||
| 	"github.com/stretchr/testify/require" | ||||
| 	"k8s.io/apimachinery/pkg/api/resource" | ||||
| 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" | ||||
| 	"k8s.io/apimachinery/pkg/types" | ||||
| 	"k8s.io/apimachinery/pkg/util/intstr" | ||||
| @@ -230,6 +231,111 @@ func TestToKubeContainerStatus(t *testing.T) { | ||||
| 	} | ||||
| } | ||||
|  | ||||
| // TestToKubeContainerStatusWithResources tests the converting the CRI container status to | ||||
| // the internal type (i.e., toKubeContainerStatus()) for containers that returns Resources. | ||||
| func TestToKubeContainerStatusWithResources(t *testing.T) { | ||||
| 	defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.InPlacePodVerticalScaling, true)() | ||||
| 	cid := &kubecontainer.ContainerID{Type: "testRuntime", ID: "dummyid"} | ||||
| 	meta := &runtimeapi.ContainerMetadata{Name: "cname", Attempt: 3} | ||||
| 	imageSpec := &runtimeapi.ImageSpec{Image: "fimage"} | ||||
| 	var ( | ||||
| 		createdAt int64 = 327 | ||||
| 		startedAt int64 = 999 | ||||
| 	) | ||||
|  | ||||
| 	for desc, test := range map[string]struct { | ||||
| 		input    *runtimeapi.ContainerStatus | ||||
| 		expected *kubecontainer.Status | ||||
| 	}{ | ||||
| 		"container reporting cpu and memory": { | ||||
| 			input: &runtimeapi.ContainerStatus{ | ||||
| 				Id:        cid.ID, | ||||
| 				Metadata:  meta, | ||||
| 				Image:     imageSpec, | ||||
| 				State:     runtimeapi.ContainerState_CONTAINER_RUNNING, | ||||
| 				CreatedAt: createdAt, | ||||
| 				StartedAt: startedAt, | ||||
| 				Resources: &runtimeapi.ContainerResources{ | ||||
| 					Linux: &runtimeapi.LinuxContainerResources{ | ||||
| 						CpuQuota:           25000, | ||||
| 						CpuPeriod:          100000, | ||||
| 						MemoryLimitInBytes: 524288000, | ||||
| 						OomScoreAdj:        -998, | ||||
| 					}, | ||||
| 				}, | ||||
| 			}, | ||||
| 			expected: &kubecontainer.Status{ | ||||
| 				ID:        *cid, | ||||
| 				Image:     imageSpec.Image, | ||||
| 				State:     kubecontainer.ContainerStateRunning, | ||||
| 				CreatedAt: time.Unix(0, createdAt), | ||||
| 				StartedAt: time.Unix(0, startedAt), | ||||
| 				Resources: &kubecontainer.ContainerResources{ | ||||
| 					CPULimit:    resource.NewMilliQuantity(250, resource.DecimalSI), | ||||
| 					MemoryLimit: resource.NewQuantity(524288000, resource.BinarySI), | ||||
| 				}, | ||||
| 			}, | ||||
| 		}, | ||||
| 		"container reporting cpu only": { | ||||
| 			input: &runtimeapi.ContainerStatus{ | ||||
| 				Id:        cid.ID, | ||||
| 				Metadata:  meta, | ||||
| 				Image:     imageSpec, | ||||
| 				State:     runtimeapi.ContainerState_CONTAINER_RUNNING, | ||||
| 				CreatedAt: createdAt, | ||||
| 				StartedAt: startedAt, | ||||
| 				Resources: &runtimeapi.ContainerResources{ | ||||
| 					Linux: &runtimeapi.LinuxContainerResources{ | ||||
| 						CpuQuota:  50000, | ||||
| 						CpuPeriod: 100000, | ||||
| 					}, | ||||
| 				}, | ||||
| 			}, | ||||
| 			expected: &kubecontainer.Status{ | ||||
| 				ID:        *cid, | ||||
| 				Image:     imageSpec.Image, | ||||
| 				State:     kubecontainer.ContainerStateRunning, | ||||
| 				CreatedAt: time.Unix(0, createdAt), | ||||
| 				StartedAt: time.Unix(0, startedAt), | ||||
| 				Resources: &kubecontainer.ContainerResources{ | ||||
| 					CPULimit: resource.NewMilliQuantity(500, resource.DecimalSI), | ||||
| 				}, | ||||
| 			}, | ||||
| 		}, | ||||
| 		"container reporting memory only": { | ||||
| 			input: &runtimeapi.ContainerStatus{ | ||||
| 				Id:        cid.ID, | ||||
| 				Metadata:  meta, | ||||
| 				Image:     imageSpec, | ||||
| 				State:     runtimeapi.ContainerState_CONTAINER_RUNNING, | ||||
| 				CreatedAt: createdAt, | ||||
| 				StartedAt: startedAt, | ||||
| 				Resources: &runtimeapi.ContainerResources{ | ||||
| 					Linux: &runtimeapi.LinuxContainerResources{ | ||||
| 						MemoryLimitInBytes: 524288000, | ||||
| 						OomScoreAdj:        -998, | ||||
| 					}, | ||||
| 				}, | ||||
| 			}, | ||||
| 			expected: &kubecontainer.Status{ | ||||
| 				ID:        *cid, | ||||
| 				Image:     imageSpec.Image, | ||||
| 				State:     kubecontainer.ContainerStateRunning, | ||||
| 				CreatedAt: time.Unix(0, createdAt), | ||||
| 				StartedAt: time.Unix(0, startedAt), | ||||
| 				Resources: &kubecontainer.ContainerResources{ | ||||
| 					MemoryLimit: resource.NewQuantity(524288000, resource.BinarySI), | ||||
| 				}, | ||||
| 			}, | ||||
| 		}, | ||||
| 	} { | ||||
| 		t.Run(desc, func(t *testing.T) { | ||||
| 			actual := toKubeContainerStatus(test.input, cid.Type) | ||||
| 			assert.Equal(t, test.expected, actual, desc) | ||||
| 		}) | ||||
| 	} | ||||
| } | ||||
|  | ||||
| func TestLifeCycleHook(t *testing.T) { | ||||
|  | ||||
| 	// Setup | ||||
| @@ -696,3 +802,39 @@ func TestKillContainerGracePeriod(t *testing.T) { | ||||
| 		}) | ||||
| 	} | ||||
| } | ||||
|  | ||||
| // TestUpdateContainerResources tests updating a container in a Pod. | ||||
| func TestUpdateContainerResources(t *testing.T) { | ||||
| 	fakeRuntime, _, m, errCreate := createTestRuntimeManager() | ||||
| 	require.NoError(t, errCreate) | ||||
| 	pod := &v1.Pod{ | ||||
| 		ObjectMeta: metav1.ObjectMeta{ | ||||
| 			UID:       "12345678", | ||||
| 			Name:      "bar", | ||||
| 			Namespace: "new", | ||||
| 		}, | ||||
| 		Spec: v1.PodSpec{ | ||||
| 			Containers: []v1.Container{ | ||||
| 				{ | ||||
| 					Name:            "foo", | ||||
| 					Image:           "busybox", | ||||
| 					ImagePullPolicy: v1.PullIfNotPresent, | ||||
| 				}, | ||||
| 			}, | ||||
| 		}, | ||||
| 	} | ||||
|  | ||||
| 	// Create fake sandbox and container | ||||
| 	_, fakeContainers := makeAndSetFakePod(t, m, fakeRuntime, pod) | ||||
| 	assert.Equal(t, len(fakeContainers), 1) | ||||
|  | ||||
| 	cStatus, err := m.getPodContainerStatuses(pod.UID, pod.Name, pod.Namespace) | ||||
| 	assert.NoError(t, err) | ||||
| 	containerID := cStatus[0].ID | ||||
|  | ||||
| 	err = m.updateContainerResources(pod, &pod.Spec.Containers[0], containerID) | ||||
| 	assert.NoError(t, err) | ||||
|  | ||||
| 	// Verify container is updated | ||||
| 	assert.Contains(t, fakeRuntime.Called, "UpdateContainerResources") | ||||
| } | ||||
|   | ||||
| @@ -29,3 +29,12 @@ import ( | ||||
| func (m *kubeGenericRuntimeManager) applyPlatformSpecificContainerConfig(config *runtimeapi.ContainerConfig, container *v1.Container, pod *v1.Pod, uid *int64, username string, nsTarget *kubecontainer.ContainerID) error { | ||||
| 	return nil | ||||
| } | ||||
|  | ||||
| // generateContainerResources generates platform specific container resources config for runtime | ||||
| func (m *kubeGenericRuntimeManager) generateContainerResources(pod *v1.Pod, container *v1.Container) *runtimeapi.ContainerResources { | ||||
| 	return nil | ||||
| } | ||||
|  | ||||
| func toKubeContainerResources(statusResources *runtimeapi.ContainerResources) *kubecontainer.ContainerResources { | ||||
| 	return nil | ||||
| } | ||||
|   | ||||
| @@ -40,6 +40,12 @@ func (m *kubeGenericRuntimeManager) applyPlatformSpecificContainerConfig(config | ||||
| 	return nil | ||||
| } | ||||
|  | ||||
| // generateContainerResources generates platform specific (windows) container resources config for runtime | ||||
| func (m *kubeGenericRuntimeManager) generateContainerResources(pod *v1.Pod, container *v1.Container) *runtimeapi.ContainerResources { | ||||
| 	//TODO: Add windows support | ||||
| 	return nil | ||||
| } | ||||
|  | ||||
| // generateWindowsContainerConfig generates windows container config for kubelet runtime v1. | ||||
| // Refer https://git.k8s.io/design-proposals-archive/node/cri-windows.md. | ||||
| func (m *kubeGenericRuntimeManager) generateWindowsContainerConfig(container *v1.Container, pod *v1.Pod, uid *int64, username string) (*runtimeapi.WindowsContainerConfig, error) { | ||||
| @@ -126,3 +132,8 @@ func calculateCPUMaximum(cpuLimit *resource.Quantity, cpuCount int64) int64 { | ||||
| 	} | ||||
| 	return cpuMaximum | ||||
| } | ||||
|  | ||||
| func toKubeContainerResources(statusResources *runtimeapi.ContainerResources) *kubecontainer.ContainerResources { | ||||
| 	//TODO: Add windows support | ||||
| 	return nil | ||||
| } | ||||
|   | ||||
| @@ -30,8 +30,10 @@ import ( | ||||
| 	"k8s.io/klog/v2" | ||||
|  | ||||
| 	v1 "k8s.io/api/core/v1" | ||||
| 	"k8s.io/apimachinery/pkg/api/resource" | ||||
| 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" | ||||
| 	kubetypes "k8s.io/apimachinery/pkg/types" | ||||
| 	"k8s.io/apimachinery/pkg/util/diff" | ||||
| 	utilruntime "k8s.io/apimachinery/pkg/util/runtime" | ||||
| 	utilversion "k8s.io/apimachinery/pkg/util/version" | ||||
| 	utilfeature "k8s.io/apiserver/pkg/util/feature" | ||||
| @@ -42,6 +44,7 @@ import ( | ||||
| 	internalapi "k8s.io/cri-api/pkg/apis" | ||||
| 	runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1" | ||||
| 	"k8s.io/kubernetes/pkg/api/legacyscheme" | ||||
| 	podutil "k8s.io/kubernetes/pkg/api/v1/pod" | ||||
| 	"k8s.io/kubernetes/pkg/credentialprovider" | ||||
| 	"k8s.io/kubernetes/pkg/credentialprovider/plugin" | ||||
| 	"k8s.io/kubernetes/pkg/features" | ||||
| @@ -133,6 +136,9 @@ type kubeGenericRuntimeManager struct { | ||||
| 	// The directory path for seccomp profiles. | ||||
| 	seccompProfileRoot string | ||||
|  | ||||
| 	// Container management interface for pod container. | ||||
| 	containerManager cm.ContainerManager | ||||
|  | ||||
| 	// Internal lifecycle event handlers for container resource management. | ||||
| 	internalLifecycle cm.InternalContainerLifecycle | ||||
|  | ||||
| @@ -190,7 +196,7 @@ func NewKubeGenericRuntimeManager( | ||||
| 	cpuCFSQuotaPeriod metav1.Duration, | ||||
| 	runtimeService internalapi.RuntimeService, | ||||
| 	imageService internalapi.ImageManagerService, | ||||
| 	internalLifecycle cm.InternalContainerLifecycle, | ||||
| 	containerManager cm.ContainerManager, | ||||
| 	logManager logs.ContainerLogManager, | ||||
| 	runtimeClassManager *runtimeclass.Manager, | ||||
| 	seccompDefault bool, | ||||
| @@ -215,7 +221,8 @@ func NewKubeGenericRuntimeManager( | ||||
| 		runtimeHelper:          runtimeHelper, | ||||
| 		runtimeService:         runtimeService, | ||||
| 		imageService:           imageService, | ||||
| 		internalLifecycle:      internalLifecycle, | ||||
| 		containerManager:       containerManager, | ||||
| 		internalLifecycle:      containerManager.InternalContainerLifecycle(), | ||||
| 		logManager:             logManager, | ||||
| 		runtimeClassManager:    runtimeClassManager, | ||||
| 		logReduction:           logreduction.NewLogReduction(identicalErrorDelay), | ||||
| @@ -446,6 +453,26 @@ type containerToKillInfo struct { | ||||
| 	reason containerKillReason | ||||
| } | ||||
|  | ||||
| // containerResources holds the set of resources applicable to the running container | ||||
| type containerResources struct { | ||||
| 	memoryLimit   int64 | ||||
| 	memoryRequest int64 | ||||
| 	cpuLimit      int64 | ||||
| 	cpuRequest    int64 | ||||
| } | ||||
|  | ||||
| // containerToUpdateInfo contains necessary information to update a container's resources. | ||||
| type containerToUpdateInfo struct { | ||||
| 	// Index of the container in pod.Spec.Containers that needs resource update | ||||
| 	apiContainerIdx int | ||||
| 	// ID of the runtime container that needs resource update | ||||
| 	kubeContainerID kubecontainer.ContainerID | ||||
| 	// Desired resources for the running container | ||||
| 	desiredContainerResources containerResources | ||||
| 	// Most recently configured resources on the running container | ||||
| 	currentContainerResources *containerResources | ||||
| } | ||||
|  | ||||
| // podActions keeps information what to do for a pod. | ||||
| type podActions struct { | ||||
| 	// Stop all running (regular, init and ephemeral) containers and the sandbox for the pod. | ||||
| @@ -471,6 +498,11 @@ type podActions struct { | ||||
| 	// EphemeralContainersToStart is a list of indexes for the ephemeral containers to start, | ||||
| 	// where the index is the index of the specific container in pod.Spec.EphemeralContainers. | ||||
| 	EphemeralContainersToStart []int | ||||
| 	// ContainersToUpdate keeps a list of containers needing resource update. | ||||
| 	// Container resource update is applicable only for CPU and memory. | ||||
| 	ContainersToUpdate map[v1.ResourceName][]containerToUpdateInfo | ||||
| 	// UpdatePodResources is true if container(s) need resource update with restart | ||||
| 	UpdatePodResources bool | ||||
| } | ||||
|  | ||||
| func containerChanged(container *v1.Container, containerStatus *kubecontainer.Status) (uint64, uint64, bool) { | ||||
| @@ -490,6 +522,263 @@ func containerSucceeded(c *v1.Container, podStatus *kubecontainer.PodStatus) boo | ||||
| 	return cStatus.ExitCode == 0 | ||||
| } | ||||
|  | ||||
| func (m *kubeGenericRuntimeManager) computePodResizeAction(pod *v1.Pod, containerIdx int, kubeContainerStatus *kubecontainer.Status, changes *podActions) bool { | ||||
| 	container := pod.Spec.Containers[containerIdx] | ||||
| 	if container.Resources.Limits == nil || len(pod.Status.ContainerStatuses) == 0 { | ||||
| 		return true | ||||
| 	} | ||||
|  | ||||
| 	// Determine if the *running* container needs resource update by comparing v1.Spec.Resources (desired) | ||||
| 	// with v1.Status.Resources / runtime.Status.Resources (last known actual). | ||||
| 	// Proceed only when kubelet has accepted the resize a.k.a v1.Spec.Resources.Requests == v1.Status.ResourcesAllocated. | ||||
| 	// Skip if runtime containerID doesn't match pod.Status containerID (container is restarting) | ||||
| 	apiContainerStatus, exists := podutil.GetContainerStatus(pod.Status.ContainerStatuses, container.Name) | ||||
| 	if !exists || apiContainerStatus.State.Running == nil || apiContainerStatus.Resources == nil || | ||||
| 		kubeContainerStatus.State != kubecontainer.ContainerStateRunning || | ||||
| 		kubeContainerStatus.ID.String() != apiContainerStatus.ContainerID || | ||||
| 		len(diff.ObjectDiff(container.Resources.Requests, apiContainerStatus.ResourcesAllocated)) != 0 { | ||||
| 		return true | ||||
| 	} | ||||
|  | ||||
| 	desiredMemoryLimit := container.Resources.Limits.Memory().Value() | ||||
| 	desiredCPULimit := container.Resources.Limits.Cpu().MilliValue() | ||||
| 	desiredCPURequest := container.Resources.Requests.Cpu().MilliValue() | ||||
| 	currentMemoryLimit := apiContainerStatus.Resources.Limits.Memory().Value() | ||||
| 	currentCPULimit := apiContainerStatus.Resources.Limits.Cpu().MilliValue() | ||||
| 	currentCPURequest := apiContainerStatus.Resources.Requests.Cpu().MilliValue() | ||||
| 	// Runtime container status resources (from CRI), if set, supercedes v1(api) container status resrouces. | ||||
| 	if kubeContainerStatus.Resources != nil { | ||||
| 		if kubeContainerStatus.Resources.MemoryLimit != nil { | ||||
| 			currentMemoryLimit = kubeContainerStatus.Resources.MemoryLimit.Value() | ||||
| 		} | ||||
| 		if kubeContainerStatus.Resources.CPULimit != nil { | ||||
| 			currentCPULimit = kubeContainerStatus.Resources.CPULimit.MilliValue() | ||||
| 		} | ||||
| 		if kubeContainerStatus.Resources.CPURequest != nil { | ||||
| 			currentCPURequest = kubeContainerStatus.Resources.CPURequest.MilliValue() | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	// Note: cgroup doesn't support memory request today, so we don't compare that. If canAdmitPod called  during | ||||
| 	// handlePodResourcesResize finds 'fit', then desiredMemoryRequest == currentMemoryRequest. | ||||
| 	if desiredMemoryLimit == currentMemoryLimit && desiredCPULimit == currentCPULimit && desiredCPURequest == currentCPURequest { | ||||
| 		return true | ||||
| 	} | ||||
|  | ||||
| 	desiredResources := containerResources{ | ||||
| 		memoryLimit:   desiredMemoryLimit, | ||||
| 		memoryRequest: apiContainerStatus.ResourcesAllocated.Memory().Value(), | ||||
| 		cpuLimit:      desiredCPULimit, | ||||
| 		cpuRequest:    desiredCPURequest, | ||||
| 	} | ||||
| 	currentResources := containerResources{ | ||||
| 		memoryLimit:   currentMemoryLimit, | ||||
| 		memoryRequest: apiContainerStatus.Resources.Requests.Memory().Value(), | ||||
| 		cpuLimit:      currentCPULimit, | ||||
| 		cpuRequest:    currentCPURequest, | ||||
| 	} | ||||
|  | ||||
| 	resizePolicy := make(map[v1.ResourceName]v1.ResourceResizePolicy) | ||||
| 	for _, pol := range container.ResizePolicy { | ||||
| 		resizePolicy[pol.ResourceName] = pol.Policy | ||||
| 	} | ||||
| 	determineContainerResize := func(rName v1.ResourceName, specValue, statusValue int64) (resize, restart bool) { | ||||
| 		if specValue == statusValue { | ||||
| 			return false, false | ||||
| 		} | ||||
| 		if resizePolicy[rName] == v1.RestartRequired { | ||||
| 			return true, true | ||||
| 		} | ||||
| 		return true, false | ||||
| 	} | ||||
| 	markContainerForUpdate := func(rName v1.ResourceName, specValue, statusValue int64) { | ||||
| 		cUpdateInfo := containerToUpdateInfo{ | ||||
| 			apiContainerIdx:           containerIdx, | ||||
| 			kubeContainerID:           kubeContainerStatus.ID, | ||||
| 			desiredContainerResources: desiredResources, | ||||
| 			currentContainerResources: ¤tResources, | ||||
| 		} | ||||
| 		// Order the container updates such that resource decreases are applied before increases | ||||
| 		switch { | ||||
| 		case specValue > statusValue: // append | ||||
| 			changes.ContainersToUpdate[rName] = append(changes.ContainersToUpdate[rName], cUpdateInfo) | ||||
| 		case specValue < statusValue: // prepend | ||||
| 			changes.ContainersToUpdate[rName] = append(changes.ContainersToUpdate[rName], containerToUpdateInfo{}) | ||||
| 			copy(changes.ContainersToUpdate[rName][1:], changes.ContainersToUpdate[rName]) | ||||
| 			changes.ContainersToUpdate[rName][0] = cUpdateInfo | ||||
| 		} | ||||
| 	} | ||||
| 	resizeMemLim, restartMemLim := determineContainerResize(v1.ResourceMemory, desiredMemoryLimit, currentMemoryLimit) | ||||
| 	resizeCPULim, restartCPULim := determineContainerResize(v1.ResourceCPU, desiredCPULimit, currentCPULimit) | ||||
| 	resizeCPUReq, restartCPUReq := determineContainerResize(v1.ResourceCPU, desiredCPURequest, currentCPURequest) | ||||
| 	if restartCPULim || restartCPUReq || restartMemLim { | ||||
| 		// resize policy requires this container to restart | ||||
| 		changes.ContainersToKill[kubeContainerStatus.ID] = containerToKillInfo{ | ||||
| 			name:      kubeContainerStatus.Name, | ||||
| 			container: &pod.Spec.Containers[containerIdx], | ||||
| 			message:   fmt.Sprintf("Container %s resize requires restart", container.Name), | ||||
| 		} | ||||
| 		changes.ContainersToStart = append(changes.ContainersToStart, containerIdx) | ||||
| 		changes.UpdatePodResources = true | ||||
| 		return false | ||||
| 	} else { | ||||
| 		if resizeMemLim { | ||||
| 			markContainerForUpdate(v1.ResourceMemory, desiredMemoryLimit, currentMemoryLimit) | ||||
| 		} | ||||
| 		if resizeCPULim { | ||||
| 			markContainerForUpdate(v1.ResourceCPU, desiredCPULimit, currentCPULimit) | ||||
| 		} else if resizeCPUReq { | ||||
| 			markContainerForUpdate(v1.ResourceCPU, desiredCPURequest, currentCPURequest) | ||||
| 		} | ||||
| 	} | ||||
| 	return true | ||||
| } | ||||
|  | ||||
| func (m *kubeGenericRuntimeManager) doPodResizeAction(pod *v1.Pod, podStatus *kubecontainer.PodStatus, podContainerChanges podActions, result kubecontainer.PodSyncResult) { | ||||
| 	pcm := m.containerManager.NewPodContainerManager() | ||||
| 	//TODO(vinaykul,InPlacePodVerticalScaling): Figure out best way to get enforceMemoryQoS value (parameter #4 below) in platform-agnostic way | ||||
| 	podResources := cm.ResourceConfigForPod(pod, m.cpuCFSQuota, uint64((m.cpuCFSQuotaPeriod.Duration)/time.Microsecond), false) | ||||
| 	if podResources == nil { | ||||
| 		klog.ErrorS(nil, "Unable to get resource configuration", "pod", pod.Name) | ||||
| 		result.Fail(fmt.Errorf("Unable to get resource configuration processing resize for pod %s", pod.Name)) | ||||
| 		return | ||||
| 	} | ||||
| 	setPodCgroupConfig := func(rName v1.ResourceName, setLimitValue bool) error { | ||||
| 		var err error | ||||
| 		switch rName { | ||||
| 		case v1.ResourceCPU: | ||||
| 			if setLimitValue == true { | ||||
| 				err = pcm.SetPodCgroupCpuConfig(pod, podResources.CpuQuota, podResources.CpuPeriod, nil) | ||||
| 			} else { | ||||
| 				err = pcm.SetPodCgroupCpuConfig(pod, nil, podResources.CpuPeriod, podResources.CpuShares) | ||||
| 			} | ||||
| 		case v1.ResourceMemory: | ||||
| 			err = pcm.SetPodCgroupMemoryConfig(pod, *podResources.Memory) | ||||
| 		} | ||||
| 		if err != nil { | ||||
| 			klog.ErrorS(err, "Failed to set cgroup config", "resource", rName, "pod", pod.Name) | ||||
| 		} | ||||
| 		return err | ||||
| 	} | ||||
| 	// Memory and CPU are updated separately because memory resizes may be ordered differently than CPU resizes. | ||||
| 	// If resize results in net pod resource increase, set pod cgroup config before resizing containers. | ||||
| 	// If resize results in net pod resource decrease, set pod cgroup config after resizing containers. | ||||
| 	// If an error occurs at any point, abort. Let future syncpod iterations retry the unfinished stuff. | ||||
| 	resizeContainers := func(rName v1.ResourceName, currPodCgLimValue, newPodCgLimValue, currPodCgReqValue, newPodCgReqValue int64) error { | ||||
| 		var err error | ||||
| 		if newPodCgLimValue > currPodCgLimValue { | ||||
| 			if err = setPodCgroupConfig(rName, true); err != nil { | ||||
| 				return err | ||||
| 			} | ||||
| 		} | ||||
| 		if newPodCgReqValue > currPodCgReqValue { | ||||
| 			if err = setPodCgroupConfig(rName, false); err != nil { | ||||
| 				return err | ||||
| 			} | ||||
| 		} | ||||
| 		if len(podContainerChanges.ContainersToUpdate[rName]) > 0 { | ||||
| 			if err = m.updatePodContainerResources(pod, rName, podContainerChanges.ContainersToUpdate[rName]); err != nil { | ||||
| 				klog.ErrorS(err, "updatePodContainerResources failed", "pod", format.Pod(pod), "resource", rName) | ||||
| 				return err | ||||
| 			} | ||||
| 		} | ||||
| 		if newPodCgLimValue < currPodCgLimValue { | ||||
| 			err = setPodCgroupConfig(rName, true) | ||||
| 		} | ||||
| 		if newPodCgReqValue < currPodCgReqValue { | ||||
| 			if err = setPodCgroupConfig(rName, false); err != nil { | ||||
| 				return err | ||||
| 			} | ||||
| 		} | ||||
| 		return err | ||||
| 	} | ||||
| 	if len(podContainerChanges.ContainersToUpdate[v1.ResourceMemory]) > 0 || podContainerChanges.UpdatePodResources { | ||||
| 		currentPodMemoryLimit, err := pcm.GetPodCgroupMemoryConfig(pod) | ||||
| 		if err != nil { | ||||
| 			klog.ErrorS(err, "GetPodCgroupMemoryConfig failed", "pod", pod.Name) | ||||
| 			result.Fail(err) | ||||
| 			return | ||||
| 		} | ||||
| 		currentPodMemoryUsage, err := pcm.GetPodCgroupMemoryUsage(pod) | ||||
| 		if err != nil { | ||||
| 			klog.ErrorS(err, "GetPodCgroupMemoryUsage failed", "pod", pod.Name) | ||||
| 			result.Fail(err) | ||||
| 			return | ||||
| 		} | ||||
| 		if currentPodMemoryUsage >= uint64(*podResources.Memory) { | ||||
| 			klog.ErrorS(nil, "Aborting attempt to set pod memory limit less than current memory usage", "pod", pod.Name) | ||||
| 			result.Fail(fmt.Errorf("Aborting attempt to set pod memory limit less than current memory usage for pod %s", pod.Name)) | ||||
| 			return | ||||
| 		} | ||||
| 		if errResize := resizeContainers(v1.ResourceMemory, int64(currentPodMemoryLimit), *podResources.Memory, 0, 0); errResize != nil { | ||||
| 			result.Fail(errResize) | ||||
| 			return | ||||
| 		} | ||||
| 	} | ||||
| 	if len(podContainerChanges.ContainersToUpdate[v1.ResourceCPU]) > 0 || podContainerChanges.UpdatePodResources { | ||||
| 		currentPodCpuQuota, _, currentPodCPUShares, err := pcm.GetPodCgroupCpuConfig(pod) | ||||
| 		if err != nil { | ||||
| 			klog.ErrorS(err, "GetPodCgroupCpuConfig failed", "pod", pod.Name) | ||||
| 			result.Fail(err) | ||||
| 			return | ||||
| 		} | ||||
| 		if errResize := resizeContainers(v1.ResourceCPU, currentPodCpuQuota, *podResources.CpuQuota, | ||||
| 			int64(currentPodCPUShares), int64(*podResources.CpuShares)); errResize != nil { | ||||
| 			result.Fail(errResize) | ||||
| 			return | ||||
| 		} | ||||
| 	} | ||||
| } | ||||
|  | ||||
| func (m *kubeGenericRuntimeManager) updatePodContainerResources(pod *v1.Pod, resourceName v1.ResourceName, containersToUpdate []containerToUpdateInfo) error { | ||||
| 	klog.V(5).InfoS("Updating container resources", "pod", klog.KObj(pod)) | ||||
|  | ||||
| 	for _, cInfo := range containersToUpdate { | ||||
| 		container := pod.Spec.Containers[cInfo.apiContainerIdx].DeepCopy() | ||||
| 		// If updating memory limit, use most recently configured CPU request and limit values. | ||||
| 		// If updating CPU request and limit, use most recently configured memory request and limit values. | ||||
| 		switch resourceName { | ||||
| 		case v1.ResourceMemory: | ||||
| 			container.Resources.Limits = v1.ResourceList{ | ||||
| 				v1.ResourceCPU:    *resource.NewMilliQuantity(cInfo.currentContainerResources.cpuLimit, resource.DecimalSI), | ||||
| 				v1.ResourceMemory: *resource.NewQuantity(cInfo.desiredContainerResources.memoryLimit, resource.BinarySI), | ||||
| 			} | ||||
| 			container.Resources.Requests = v1.ResourceList{ | ||||
| 				v1.ResourceCPU:    *resource.NewMilliQuantity(cInfo.currentContainerResources.cpuRequest, resource.DecimalSI), | ||||
| 				v1.ResourceMemory: *resource.NewQuantity(cInfo.desiredContainerResources.memoryRequest, resource.BinarySI), | ||||
| 			} | ||||
| 		case v1.ResourceCPU: | ||||
| 			container.Resources.Limits = v1.ResourceList{ | ||||
| 				v1.ResourceCPU:    *resource.NewMilliQuantity(cInfo.desiredContainerResources.cpuLimit, resource.DecimalSI), | ||||
| 				v1.ResourceMemory: *resource.NewQuantity(cInfo.currentContainerResources.memoryLimit, resource.BinarySI), | ||||
| 			} | ||||
| 			container.Resources.Requests = v1.ResourceList{ | ||||
| 				v1.ResourceCPU:    *resource.NewMilliQuantity(cInfo.desiredContainerResources.cpuRequest, resource.DecimalSI), | ||||
| 				v1.ResourceMemory: *resource.NewQuantity(cInfo.currentContainerResources.memoryRequest, resource.BinarySI), | ||||
| 			} | ||||
| 		} | ||||
| 		if err := m.updateContainerResources(pod, container, cInfo.kubeContainerID); err != nil { | ||||
| 			// Log error and abort as container updates need to succeed in the order determined by computePodResizeAction. | ||||
| 			// The recovery path is for SyncPod to keep retrying at later times until it succeeds. | ||||
| 			klog.ErrorS(err, "updateContainerResources failed", "container", container.Name, "cID", cInfo.kubeContainerID, | ||||
| 				"pod", format.Pod(pod), "resourceName", resourceName) | ||||
| 			return err | ||||
| 		} | ||||
| 		// If UpdateContainerResources is error-free, it means desired values for 'resourceName' was accepted by runtime. | ||||
| 		// So we update currentContainerResources for 'resourceName', which is our view of most recently configured resources. | ||||
| 		// Note: We can't rely on GetPodStatus as runtime may lag in actuating the resource values it just accepted. | ||||
| 		switch resourceName { | ||||
| 		case v1.ResourceMemory: | ||||
| 			cInfo.currentContainerResources.memoryLimit = cInfo.desiredContainerResources.memoryLimit | ||||
| 			cInfo.currentContainerResources.memoryRequest = cInfo.desiredContainerResources.memoryRequest | ||||
| 		case v1.ResourceCPU: | ||||
| 			cInfo.currentContainerResources.cpuLimit = cInfo.desiredContainerResources.cpuLimit | ||||
| 			cInfo.currentContainerResources.cpuRequest = cInfo.desiredContainerResources.cpuRequest | ||||
| 		} | ||||
| 	} | ||||
| 	return nil | ||||
| } | ||||
|  | ||||
| // computePodActions checks whether the pod spec has changed and returns the changes if true. | ||||
| func (m *kubeGenericRuntimeManager) computePodActions(pod *v1.Pod, podStatus *kubecontainer.PodStatus) podActions { | ||||
| 	klog.V(5).InfoS("Syncing Pod", "pod", klog.KObj(pod)) | ||||
| @@ -582,6 +871,14 @@ func (m *kubeGenericRuntimeManager) computePodActions(pod *v1.Pod, podStatus *ku | ||||
| 		return changes | ||||
| 	} | ||||
|  | ||||
| 	if utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) { | ||||
| 		changes.ContainersToUpdate = make(map[v1.ResourceName][]containerToUpdateInfo) | ||||
| 		latestPodStatus, err := m.GetPodStatus(podStatus.ID, pod.Name, pod.Namespace) | ||||
| 		if err == nil { | ||||
| 			podStatus = latestPodStatus | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	// Number of running containers to keep. | ||||
| 	keepCount := 0 | ||||
| 	// check the status of containers. | ||||
| @@ -623,7 +920,10 @@ func (m *kubeGenericRuntimeManager) computePodActions(pod *v1.Pod, podStatus *ku | ||||
| 		var message string | ||||
| 		var reason containerKillReason | ||||
| 		restart := shouldRestartOnFailure(pod) | ||||
| 		if _, _, changed := containerChanged(&container, containerStatus); changed { | ||||
| 		// Do not restart if only the Resources field has changed with InPlacePodVerticalScaling enabled | ||||
| 		if _, _, changed := containerChanged(&container, containerStatus); changed && | ||||
| 			(!utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) || | ||||
| 				kubecontainer.HashContainerWithoutResources(&container) != containerStatus.HashWithoutResources) { | ||||
| 			message = fmt.Sprintf("Container %s definition changed", container.Name) | ||||
| 			// Restart regardless of the restart policy because the container | ||||
| 			// spec changed. | ||||
| @@ -636,6 +936,10 @@ func (m *kubeGenericRuntimeManager) computePodActions(pod *v1.Pod, podStatus *ku | ||||
| 			// If the container failed the startup probe, we should kill it. | ||||
| 			message = fmt.Sprintf("Container %s failed startup probe", container.Name) | ||||
| 			reason = reasonStartupProbe | ||||
| 		} else if utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) && | ||||
| 			!m.computePodResizeAction(pod, idx, containerStatus, &changes) { | ||||
| 			// computePodResizeAction updates 'changes' if resize policy requires restarting this container | ||||
| 			continue | ||||
| 		} else { | ||||
| 			// Keep the container. | ||||
| 			keepCount++ | ||||
| @@ -674,7 +978,8 @@ func (m *kubeGenericRuntimeManager) computePodActions(pod *v1.Pod, podStatus *ku | ||||
| //  4. Create sandbox if necessary. | ||||
| //  5. Create ephemeral containers. | ||||
| //  6. Create init containers. | ||||
| //  7. Create normal containers. | ||||
| //  7. Resize running containers (if InPlacePodVerticalScaling==true) | ||||
| //  8. Create normal containers. | ||||
| func (m *kubeGenericRuntimeManager) SyncPod(ctx context.Context, pod *v1.Pod, podStatus *kubecontainer.PodStatus, pullSecrets []v1.Secret, backOff *flowcontrol.Backoff) (result kubecontainer.PodSyncResult) { | ||||
| 	// Step 1: Compute sandbox and container changes. | ||||
| 	podContainerChanges := m.computePodActions(pod, podStatus) | ||||
| @@ -903,7 +1208,14 @@ func (m *kubeGenericRuntimeManager) SyncPod(ctx context.Context, pod *v1.Pod, po | ||||
| 		klog.V(4).InfoS("Completed init container for pod", "containerName", container.Name, "pod", klog.KObj(pod)) | ||||
| 	} | ||||
|  | ||||
| 	// Step 7: start containers in podContainerChanges.ContainersToStart. | ||||
| 	// Step 7: For containers in podContainerChanges.ContainersToUpdate[CPU,Memory] list, invoke UpdateContainerResources | ||||
| 	if utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) { | ||||
| 		if len(podContainerChanges.ContainersToUpdate) > 0 || podContainerChanges.UpdatePodResources { | ||||
| 			m.doPodResizeAction(pod, podStatus, podContainerChanges, result) | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	// Step 8: start containers in podContainerChanges.ContainersToStart. | ||||
| 	for _, idx := range podContainerChanges.ContainersToStart { | ||||
| 		start(ctx, "container", metrics.Container, containerStartSpec(&pod.Spec.Containers[idx])) | ||||
| 	} | ||||
| @@ -1096,7 +1408,6 @@ func (m *kubeGenericRuntimeManager) GetPodStatus(ctx context.Context, uid kubety | ||||
| 	} | ||||
|  | ||||
| 	m.logReduction.ClearID(podFullName) | ||||
|  | ||||
| 	return &kubecontainer.PodStatus{ | ||||
| 		ID:                uid, | ||||
| 		Name:              name, | ||||
|   | ||||
| @@ -37,11 +37,14 @@ import ( | ||||
| 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" | ||||
| 	"k8s.io/apimachinery/pkg/types" | ||||
| 	"k8s.io/apimachinery/pkg/util/sets" | ||||
| 	utilfeature "k8s.io/apiserver/pkg/util/feature" | ||||
| 	"k8s.io/client-go/util/flowcontrol" | ||||
| 	featuregatetesting "k8s.io/component-base/featuregate/testing" | ||||
| 	runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1" | ||||
| 	apitest "k8s.io/cri-api/pkg/apis/testing" | ||||
| 	podutil "k8s.io/kubernetes/pkg/api/v1/pod" | ||||
| 	"k8s.io/kubernetes/pkg/credentialprovider" | ||||
| 	"k8s.io/kubernetes/pkg/features" | ||||
| 	kubecontainer "k8s.io/kubernetes/pkg/kubelet/container" | ||||
| 	containertest "k8s.io/kubernetes/pkg/kubelet/container/testing" | ||||
| 	proberesults "k8s.io/kubernetes/pkg/kubelet/prober/results" | ||||
| @@ -861,6 +864,28 @@ func makeBasePodAndStatus() (*v1.Pod, *kubecontainer.PodStatus) { | ||||
| 				}, | ||||
| 			}, | ||||
| 		}, | ||||
| 		Status: v1.PodStatus{ | ||||
| 			ContainerStatuses: []v1.ContainerStatus{ | ||||
| 				{ | ||||
| 					ContainerID: "://id1", | ||||
| 					Name:        "foo1", | ||||
| 					Image:       "busybox", | ||||
| 					State:       v1.ContainerState{Running: &v1.ContainerStateRunning{}}, | ||||
| 				}, | ||||
| 				{ | ||||
| 					ContainerID: "://id2", | ||||
| 					Name:        "foo2", | ||||
| 					Image:       "busybox", | ||||
| 					State:       v1.ContainerState{Running: &v1.ContainerStateRunning{}}, | ||||
| 				}, | ||||
| 				{ | ||||
| 					ContainerID: "://id3", | ||||
| 					Name:        "foo3", | ||||
| 					Image:       "busybox", | ||||
| 					State:       v1.ContainerState{Running: &v1.ContainerStateRunning{}}, | ||||
| 				}, | ||||
| 			}, | ||||
| 		}, | ||||
| 	} | ||||
| 	status := &kubecontainer.PodStatus{ | ||||
| 		ID:        pod.UID, | ||||
| @@ -1615,3 +1640,466 @@ func makeBasePodAndStatusWithInitAndEphemeralContainers() (*v1.Pod, *kubecontain | ||||
| 	}) | ||||
| 	return pod, status | ||||
| } | ||||
|  | ||||
| func TestComputePodActionsForPodResize(t *testing.T) { | ||||
| 	defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.InPlacePodVerticalScaling, true)() | ||||
| 	fakeRuntime, _, m, err := createTestRuntimeManager() | ||||
| 	m.machineInfo.MemoryCapacity = 17179860387 // 16GB | ||||
| 	assert.NoError(t, err) | ||||
|  | ||||
| 	cpu100m := resource.MustParse("100m") | ||||
| 	cpu200m := resource.MustParse("200m") | ||||
| 	mem100M := resource.MustParse("100Mi") | ||||
| 	mem200M := resource.MustParse("200Mi") | ||||
| 	cpuPolicyRestartNotRequired := v1.ContainerResizePolicy{ResourceName: v1.ResourceCPU, Policy: v1.RestartNotRequired} | ||||
| 	memPolicyRestartNotRequired := v1.ContainerResizePolicy{ResourceName: v1.ResourceMemory, Policy: v1.RestartNotRequired} | ||||
| 	cpuPolicyRestartRequired := v1.ContainerResizePolicy{ResourceName: v1.ResourceCPU, Policy: v1.RestartRequired} | ||||
| 	memPolicyRestartRequired := v1.ContainerResizePolicy{ResourceName: v1.ResourceMemory, Policy: v1.RestartRequired} | ||||
|  | ||||
| 	for desc, test := range map[string]struct { | ||||
| 		podResizePolicyFn       func(*v1.Pod) | ||||
| 		mutatePodFn             func(*v1.Pod) | ||||
| 		getExpectedPodActionsFn func(*v1.Pod, *kubecontainer.PodStatus) *podActions | ||||
| 	}{ | ||||
| 		"Update container CPU and memory resources": { | ||||
| 			mutatePodFn: func(pod *v1.Pod) { | ||||
| 				pod.Spec.Containers[1].Resources = v1.ResourceRequirements{ | ||||
| 					Limits: v1.ResourceList{v1.ResourceCPU: cpu100m, v1.ResourceMemory: mem100M}, | ||||
| 				} | ||||
| 				if idx, found := podutil.GetIndexOfContainerStatus(pod.Status.ContainerStatuses, pod.Spec.Containers[1].Name); found { | ||||
| 					pod.Status.ContainerStatuses[idx].Resources = &v1.ResourceRequirements{ | ||||
| 						Limits: v1.ResourceList{v1.ResourceCPU: cpu200m, v1.ResourceMemory: mem200M}, | ||||
| 					} | ||||
| 				} | ||||
| 			}, | ||||
| 			getExpectedPodActionsFn: func(pod *v1.Pod, podStatus *kubecontainer.PodStatus) *podActions { | ||||
| 				kcs := podStatus.FindContainerStatusByName(pod.Spec.Containers[1].Name) | ||||
| 				pa := podActions{ | ||||
| 					SandboxID:         podStatus.SandboxStatuses[0].Id, | ||||
| 					ContainersToStart: []int{}, | ||||
| 					ContainersToKill:  getKillMap(pod, podStatus, []int{}), | ||||
| 					ContainersToUpdate: map[v1.ResourceName][]containerToUpdateInfo{ | ||||
| 						v1.ResourceMemory: { | ||||
| 							{ | ||||
| 								apiContainerIdx: 1, | ||||
| 								kubeContainerID: kcs.ID, | ||||
| 								desiredContainerResources: containerResources{ | ||||
| 									memoryLimit: mem100M.Value(), | ||||
| 									cpuLimit:    cpu100m.MilliValue(), | ||||
| 								}, | ||||
| 								currentContainerResources: &containerResources{ | ||||
| 									memoryLimit: mem200M.Value(), | ||||
| 									cpuLimit:    cpu200m.MilliValue(), | ||||
| 								}, | ||||
| 							}, | ||||
| 						}, | ||||
| 						v1.ResourceCPU: { | ||||
| 							{ | ||||
| 								apiContainerIdx: 1, | ||||
| 								kubeContainerID: kcs.ID, | ||||
| 								desiredContainerResources: containerResources{ | ||||
| 									memoryLimit: mem100M.Value(), | ||||
| 									cpuLimit:    cpu100m.MilliValue(), | ||||
| 								}, | ||||
| 								currentContainerResources: &containerResources{ | ||||
| 									memoryLimit: mem200M.Value(), | ||||
| 									cpuLimit:    cpu200m.MilliValue(), | ||||
| 								}, | ||||
| 							}, | ||||
| 						}, | ||||
| 					}, | ||||
| 				} | ||||
| 				return &pa | ||||
| 			}, | ||||
| 		}, | ||||
| 		"Update container CPU resources": { | ||||
| 			mutatePodFn: func(pod *v1.Pod) { | ||||
| 				pod.Spec.Containers[1].Resources = v1.ResourceRequirements{ | ||||
| 					Limits: v1.ResourceList{v1.ResourceCPU: cpu100m, v1.ResourceMemory: mem100M}, | ||||
| 				} | ||||
| 				if idx, found := podutil.GetIndexOfContainerStatus(pod.Status.ContainerStatuses, pod.Spec.Containers[1].Name); found { | ||||
| 					pod.Status.ContainerStatuses[idx].Resources = &v1.ResourceRequirements{ | ||||
| 						Limits: v1.ResourceList{v1.ResourceCPU: cpu200m, v1.ResourceMemory: mem100M}, | ||||
| 					} | ||||
| 				} | ||||
| 			}, | ||||
| 			getExpectedPodActionsFn: func(pod *v1.Pod, podStatus *kubecontainer.PodStatus) *podActions { | ||||
| 				kcs := podStatus.FindContainerStatusByName(pod.Spec.Containers[1].Name) | ||||
| 				pa := podActions{ | ||||
| 					SandboxID:         podStatus.SandboxStatuses[0].Id, | ||||
| 					ContainersToStart: []int{}, | ||||
| 					ContainersToKill:  getKillMap(pod, podStatus, []int{}), | ||||
| 					ContainersToUpdate: map[v1.ResourceName][]containerToUpdateInfo{ | ||||
| 						v1.ResourceCPU: { | ||||
| 							{ | ||||
| 								apiContainerIdx: 1, | ||||
| 								kubeContainerID: kcs.ID, | ||||
| 								desiredContainerResources: containerResources{ | ||||
| 									memoryLimit: mem100M.Value(), | ||||
| 									cpuLimit:    cpu100m.MilliValue(), | ||||
| 								}, | ||||
| 								currentContainerResources: &containerResources{ | ||||
| 									memoryLimit: mem100M.Value(), | ||||
| 									cpuLimit:    cpu200m.MilliValue(), | ||||
| 								}, | ||||
| 							}, | ||||
| 						}, | ||||
| 					}, | ||||
| 				} | ||||
| 				return &pa | ||||
| 			}, | ||||
| 		}, | ||||
| 		"Update container memory resources": { | ||||
| 			mutatePodFn: func(pod *v1.Pod) { | ||||
| 				pod.Spec.Containers[2].Resources = v1.ResourceRequirements{ | ||||
| 					Limits: v1.ResourceList{v1.ResourceCPU: cpu200m, v1.ResourceMemory: mem200M}, | ||||
| 				} | ||||
| 				if idx, found := podutil.GetIndexOfContainerStatus(pod.Status.ContainerStatuses, pod.Spec.Containers[2].Name); found { | ||||
| 					pod.Status.ContainerStatuses[idx].Resources = &v1.ResourceRequirements{ | ||||
| 						Limits: v1.ResourceList{v1.ResourceCPU: cpu200m, v1.ResourceMemory: mem100M}, | ||||
| 					} | ||||
| 				} | ||||
| 			}, | ||||
| 			getExpectedPodActionsFn: func(pod *v1.Pod, podStatus *kubecontainer.PodStatus) *podActions { | ||||
| 				kcs := podStatus.FindContainerStatusByName(pod.Spec.Containers[2].Name) | ||||
| 				pa := podActions{ | ||||
| 					SandboxID:         podStatus.SandboxStatuses[0].Id, | ||||
| 					ContainersToStart: []int{}, | ||||
| 					ContainersToKill:  getKillMap(pod, podStatus, []int{}), | ||||
| 					ContainersToUpdate: map[v1.ResourceName][]containerToUpdateInfo{ | ||||
| 						v1.ResourceMemory: { | ||||
| 							{ | ||||
| 								apiContainerIdx: 2, | ||||
| 								kubeContainerID: kcs.ID, | ||||
| 								desiredContainerResources: containerResources{ | ||||
| 									memoryLimit: mem200M.Value(), | ||||
| 									cpuLimit:    cpu200m.MilliValue(), | ||||
| 								}, | ||||
| 								currentContainerResources: &containerResources{ | ||||
| 									memoryLimit: mem100M.Value(), | ||||
| 									cpuLimit:    cpu200m.MilliValue(), | ||||
| 								}, | ||||
| 							}, | ||||
| 						}, | ||||
| 					}, | ||||
| 				} | ||||
| 				return &pa | ||||
| 			}, | ||||
| 		}, | ||||
| 		"Nothing when spec.Resources and status.Resources are equal": { | ||||
| 			mutatePodFn: func(pod *v1.Pod) { | ||||
| 				pod.Spec.Containers[1].Resources = v1.ResourceRequirements{ | ||||
| 					Limits: v1.ResourceList{v1.ResourceCPU: cpu200m}, | ||||
| 				} | ||||
| 				pod.Status.ContainerStatuses[1].Resources = &v1.ResourceRequirements{ | ||||
| 					Limits: v1.ResourceList{v1.ResourceCPU: cpu200m}, | ||||
| 				} | ||||
| 			}, | ||||
| 			getExpectedPodActionsFn: func(pod *v1.Pod, podStatus *kubecontainer.PodStatus) *podActions { | ||||
| 				pa := podActions{ | ||||
| 					SandboxID:          podStatus.SandboxStatuses[0].Id, | ||||
| 					ContainersToKill:   getKillMap(pod, podStatus, []int{}), | ||||
| 					ContainersToStart:  []int{}, | ||||
| 					ContainersToUpdate: map[v1.ResourceName][]containerToUpdateInfo{}, | ||||
| 				} | ||||
| 				return &pa | ||||
| 			}, | ||||
| 		}, | ||||
| 		"Update container CPU and memory resources with Restart policy for CPU": { | ||||
| 			podResizePolicyFn: func(pod *v1.Pod) { | ||||
| 				pod.Spec.Containers[0].ResizePolicy = []v1.ContainerResizePolicy{cpuPolicyRestartRequired, memPolicyRestartNotRequired} | ||||
| 			}, | ||||
| 			mutatePodFn: func(pod *v1.Pod) { | ||||
| 				pod.Spec.Containers[0].Resources = v1.ResourceRequirements{ | ||||
| 					Limits: v1.ResourceList{v1.ResourceCPU: cpu200m, v1.ResourceMemory: mem200M}, | ||||
| 				} | ||||
| 				if idx, found := podutil.GetIndexOfContainerStatus(pod.Status.ContainerStatuses, pod.Spec.Containers[0].Name); found { | ||||
| 					pod.Status.ContainerStatuses[idx].Resources = &v1.ResourceRequirements{ | ||||
| 						Limits: v1.ResourceList{v1.ResourceCPU: cpu100m, v1.ResourceMemory: mem100M}, | ||||
| 					} | ||||
| 				} | ||||
| 			}, | ||||
| 			getExpectedPodActionsFn: func(pod *v1.Pod, podStatus *kubecontainer.PodStatus) *podActions { | ||||
| 				kcs := podStatus.FindContainerStatusByName(pod.Spec.Containers[0].Name) | ||||
| 				killMap := make(map[kubecontainer.ContainerID]containerToKillInfo) | ||||
| 				killMap[kcs.ID] = containerToKillInfo{ | ||||
| 					container: &pod.Spec.Containers[0], | ||||
| 					name:      pod.Spec.Containers[0].Name, | ||||
| 				} | ||||
| 				pa := podActions{ | ||||
| 					SandboxID:          podStatus.SandboxStatuses[0].Id, | ||||
| 					ContainersToStart:  []int{0}, | ||||
| 					ContainersToKill:   killMap, | ||||
| 					ContainersToUpdate: map[v1.ResourceName][]containerToUpdateInfo{}, | ||||
| 					UpdatePodResources: true, | ||||
| 				} | ||||
| 				return &pa | ||||
| 			}, | ||||
| 		}, | ||||
| 		"Update container CPU and memory resources with Restart policy for memory": { | ||||
| 			podResizePolicyFn: func(pod *v1.Pod) { | ||||
| 				pod.Spec.Containers[2].ResizePolicy = []v1.ContainerResizePolicy{cpuPolicyRestartNotRequired, memPolicyRestartRequired} | ||||
| 			}, | ||||
| 			mutatePodFn: func(pod *v1.Pod) { | ||||
| 				pod.Spec.Containers[2].Resources = v1.ResourceRequirements{ | ||||
| 					Limits: v1.ResourceList{v1.ResourceCPU: cpu200m, v1.ResourceMemory: mem200M}, | ||||
| 				} | ||||
| 				if idx, found := podutil.GetIndexOfContainerStatus(pod.Status.ContainerStatuses, pod.Spec.Containers[2].Name); found { | ||||
| 					pod.Status.ContainerStatuses[idx].Resources = &v1.ResourceRequirements{ | ||||
| 						Limits: v1.ResourceList{v1.ResourceCPU: cpu100m, v1.ResourceMemory: mem100M}, | ||||
| 					} | ||||
| 				} | ||||
| 			}, | ||||
| 			getExpectedPodActionsFn: func(pod *v1.Pod, podStatus *kubecontainer.PodStatus) *podActions { | ||||
| 				kcs := podStatus.FindContainerStatusByName(pod.Spec.Containers[2].Name) | ||||
| 				killMap := make(map[kubecontainer.ContainerID]containerToKillInfo) | ||||
| 				killMap[kcs.ID] = containerToKillInfo{ | ||||
| 					container: &pod.Spec.Containers[2], | ||||
| 					name:      pod.Spec.Containers[2].Name, | ||||
| 				} | ||||
| 				pa := podActions{ | ||||
| 					SandboxID:          podStatus.SandboxStatuses[0].Id, | ||||
| 					ContainersToStart:  []int{2}, | ||||
| 					ContainersToKill:   killMap, | ||||
| 					ContainersToUpdate: map[v1.ResourceName][]containerToUpdateInfo{}, | ||||
| 					UpdatePodResources: true, | ||||
| 				} | ||||
| 				return &pa | ||||
| 			}, | ||||
| 		}, | ||||
| 		"Update container memory resources with Restart policy for CPU": { | ||||
| 			podResizePolicyFn: func(pod *v1.Pod) { | ||||
| 				pod.Spec.Containers[1].ResizePolicy = []v1.ContainerResizePolicy{cpuPolicyRestartRequired, memPolicyRestartNotRequired} | ||||
| 			}, | ||||
| 			mutatePodFn: func(pod *v1.Pod) { | ||||
| 				pod.Spec.Containers[1].Resources = v1.ResourceRequirements{ | ||||
| 					Limits: v1.ResourceList{v1.ResourceCPU: cpu100m, v1.ResourceMemory: mem200M}, | ||||
| 				} | ||||
| 				if idx, found := podutil.GetIndexOfContainerStatus(pod.Status.ContainerStatuses, pod.Spec.Containers[1].Name); found { | ||||
| 					pod.Status.ContainerStatuses[idx].Resources = &v1.ResourceRequirements{ | ||||
| 						Limits: v1.ResourceList{v1.ResourceCPU: cpu100m, v1.ResourceMemory: mem100M}, | ||||
| 					} | ||||
| 				} | ||||
| 			}, | ||||
| 			getExpectedPodActionsFn: func(pod *v1.Pod, podStatus *kubecontainer.PodStatus) *podActions { | ||||
| 				kcs := podStatus.FindContainerStatusByName(pod.Spec.Containers[1].Name) | ||||
| 				pa := podActions{ | ||||
| 					SandboxID:         podStatus.SandboxStatuses[0].Id, | ||||
| 					ContainersToStart: []int{}, | ||||
| 					ContainersToKill:  getKillMap(pod, podStatus, []int{}), | ||||
| 					ContainersToUpdate: map[v1.ResourceName][]containerToUpdateInfo{ | ||||
| 						v1.ResourceMemory: { | ||||
| 							{ | ||||
| 								apiContainerIdx: 1, | ||||
| 								kubeContainerID: kcs.ID, | ||||
| 								desiredContainerResources: containerResources{ | ||||
| 									memoryLimit: mem200M.Value(), | ||||
| 									cpuLimit:    cpu100m.MilliValue(), | ||||
| 								}, | ||||
| 								currentContainerResources: &containerResources{ | ||||
| 									memoryLimit: mem100M.Value(), | ||||
| 									cpuLimit:    cpu100m.MilliValue(), | ||||
| 								}, | ||||
| 							}, | ||||
| 						}, | ||||
| 					}, | ||||
| 				} | ||||
| 				return &pa | ||||
| 			}, | ||||
| 		}, | ||||
| 		"Update container CPU resources with Restart policy for memory": { | ||||
| 			podResizePolicyFn: func(pod *v1.Pod) { | ||||
| 				pod.Spec.Containers[2].ResizePolicy = []v1.ContainerResizePolicy{cpuPolicyRestartNotRequired, memPolicyRestartRequired} | ||||
| 			}, | ||||
| 			mutatePodFn: func(pod *v1.Pod) { | ||||
| 				pod.Spec.Containers[2].Resources = v1.ResourceRequirements{ | ||||
| 					Limits: v1.ResourceList{v1.ResourceCPU: cpu200m, v1.ResourceMemory: mem100M}, | ||||
| 				} | ||||
| 				if idx, found := podutil.GetIndexOfContainerStatus(pod.Status.ContainerStatuses, pod.Spec.Containers[2].Name); found { | ||||
| 					pod.Status.ContainerStatuses[idx].Resources = &v1.ResourceRequirements{ | ||||
| 						Limits: v1.ResourceList{v1.ResourceCPU: cpu100m, v1.ResourceMemory: mem100M}, | ||||
| 					} | ||||
| 				} | ||||
| 			}, | ||||
| 			getExpectedPodActionsFn: func(pod *v1.Pod, podStatus *kubecontainer.PodStatus) *podActions { | ||||
| 				kcs := podStatus.FindContainerStatusByName(pod.Spec.Containers[2].Name) | ||||
| 				pa := podActions{ | ||||
| 					SandboxID:         podStatus.SandboxStatuses[0].Id, | ||||
| 					ContainersToStart: []int{}, | ||||
| 					ContainersToKill:  getKillMap(pod, podStatus, []int{}), | ||||
| 					ContainersToUpdate: map[v1.ResourceName][]containerToUpdateInfo{ | ||||
| 						v1.ResourceCPU: { | ||||
| 							{ | ||||
| 								apiContainerIdx: 2, | ||||
| 								kubeContainerID: kcs.ID, | ||||
| 								desiredContainerResources: containerResources{ | ||||
| 									memoryLimit: mem100M.Value(), | ||||
| 									cpuLimit:    cpu200m.MilliValue(), | ||||
| 								}, | ||||
| 								currentContainerResources: &containerResources{ | ||||
| 									memoryLimit: mem100M.Value(), | ||||
| 									cpuLimit:    cpu100m.MilliValue(), | ||||
| 								}, | ||||
| 							}, | ||||
| 						}, | ||||
| 					}, | ||||
| 				} | ||||
| 				return &pa | ||||
| 			}, | ||||
| 		}, | ||||
| 	} { | ||||
| 		pod, kps := makeBasePodAndStatus() | ||||
| 		for idx := range pod.Spec.Containers { | ||||
| 			// default resize policy when pod resize feature is enabled | ||||
| 			pod.Spec.Containers[idx].ResizePolicy = []v1.ContainerResizePolicy{cpuPolicyRestartNotRequired, memPolicyRestartNotRequired} | ||||
| 		} | ||||
| 		if test.podResizePolicyFn != nil { | ||||
| 			test.podResizePolicyFn(pod) | ||||
| 		} | ||||
| 		for idx := range pod.Spec.Containers { | ||||
| 			// compute hash | ||||
| 			if kcs := kps.FindContainerStatusByName(pod.Spec.Containers[idx].Name); kcs != nil { | ||||
| 				kcs.Hash = kubecontainer.HashContainer(&pod.Spec.Containers[idx]) | ||||
| 				kcs.HashWithoutResources = kubecontainer.HashContainerWithoutResources(&pod.Spec.Containers[idx]) | ||||
| 			} | ||||
| 		} | ||||
| 		makeAndSetFakePod(t, m, fakeRuntime, pod) | ||||
| 		status, _ := m.GetPodStatus(kps.ID, pod.Name, pod.Namespace) | ||||
| 		for idx := range pod.Spec.Containers { | ||||
| 			if rcs := status.FindContainerStatusByName(pod.Spec.Containers[idx].Name); rcs != nil { | ||||
| 				if csIdx, found := podutil.GetIndexOfContainerStatus(pod.Status.ContainerStatuses, pod.Spec.Containers[idx].Name); found { | ||||
| 					pod.Status.ContainerStatuses[csIdx].ContainerID = rcs.ID.String() | ||||
| 				} | ||||
| 			} | ||||
| 		} | ||||
| 		for idx := range pod.Spec.Containers { | ||||
| 			if kcs := kps.FindContainerStatusByName(pod.Spec.Containers[idx].Name); kcs != nil { | ||||
| 				kcs.Hash = kubecontainer.HashContainer(&pod.Spec.Containers[idx]) | ||||
| 				kcs.HashWithoutResources = kubecontainer.HashContainerWithoutResources(&pod.Spec.Containers[idx]) | ||||
| 			} | ||||
| 		} | ||||
| 		if test.mutatePodFn != nil { | ||||
| 			test.mutatePodFn(pod) | ||||
| 		} | ||||
| 		expectedActions := test.getExpectedPodActionsFn(pod, status) | ||||
| 		actions := m.computePodActions(pod, status) | ||||
| 		verifyActions(t, expectedActions, &actions, desc) | ||||
| 	} | ||||
| } | ||||
|  | ||||
| func TestUpdatePodContainerResources(t *testing.T) { | ||||
| 	defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.InPlacePodVerticalScaling, true)() | ||||
| 	fakeRuntime, _, m, err := createTestRuntimeManager() | ||||
| 	m.machineInfo.MemoryCapacity = 17179860387 // 16GB | ||||
| 	assert.NoError(t, err) | ||||
|  | ||||
| 	cpu100m := resource.MustParse("100m") | ||||
| 	cpu150m := resource.MustParse("150m") | ||||
| 	cpu200m := resource.MustParse("200m") | ||||
| 	cpu250m := resource.MustParse("250m") | ||||
| 	cpu300m := resource.MustParse("300m") | ||||
| 	cpu350m := resource.MustParse("350m") | ||||
| 	mem100M := resource.MustParse("100Mi") | ||||
| 	mem150M := resource.MustParse("150Mi") | ||||
| 	mem200M := resource.MustParse("200Mi") | ||||
| 	mem250M := resource.MustParse("250Mi") | ||||
| 	mem300M := resource.MustParse("300Mi") | ||||
| 	mem350M := resource.MustParse("350Mi") | ||||
| 	res100m100Mi := v1.ResourceList{v1.ResourceCPU: cpu100m, v1.ResourceMemory: mem100M} | ||||
| 	res150m100Mi := v1.ResourceList{v1.ResourceCPU: cpu150m, v1.ResourceMemory: mem100M} | ||||
| 	res100m150Mi := v1.ResourceList{v1.ResourceCPU: cpu100m, v1.ResourceMemory: mem150M} | ||||
| 	res150m150Mi := v1.ResourceList{v1.ResourceCPU: cpu150m, v1.ResourceMemory: mem150M} | ||||
| 	res200m200Mi := v1.ResourceList{v1.ResourceCPU: cpu200m, v1.ResourceMemory: mem200M} | ||||
| 	res250m200Mi := v1.ResourceList{v1.ResourceCPU: cpu250m, v1.ResourceMemory: mem200M} | ||||
| 	res200m250Mi := v1.ResourceList{v1.ResourceCPU: cpu200m, v1.ResourceMemory: mem250M} | ||||
| 	res250m250Mi := v1.ResourceList{v1.ResourceCPU: cpu250m, v1.ResourceMemory: mem250M} | ||||
| 	res300m300Mi := v1.ResourceList{v1.ResourceCPU: cpu300m, v1.ResourceMemory: mem300M} | ||||
| 	res350m300Mi := v1.ResourceList{v1.ResourceCPU: cpu350m, v1.ResourceMemory: mem300M} | ||||
| 	res300m350Mi := v1.ResourceList{v1.ResourceCPU: cpu300m, v1.ResourceMemory: mem350M} | ||||
| 	res350m350Mi := v1.ResourceList{v1.ResourceCPU: cpu350m, v1.ResourceMemory: mem350M} | ||||
|  | ||||
| 	pod, _ := makeBasePodAndStatus() | ||||
| 	makeAndSetFakePod(t, m, fakeRuntime, pod) | ||||
|  | ||||
| 	for dsc, tc := range map[string]struct { | ||||
| 		resourceName            v1.ResourceName | ||||
| 		apiSpecResources        []v1.ResourceRequirements | ||||
| 		apiStatusResources      []v1.ResourceRequirements | ||||
| 		requiresRestart         []bool | ||||
| 		invokeUpdateResources   bool | ||||
| 		expectedCurrentLimits   []v1.ResourceList | ||||
| 		expectedCurrentRequests []v1.ResourceList | ||||
| 	}{ | ||||
| 		"Guaranteed QoS Pod - CPU & memory resize requested, update CPU": { | ||||
| 			resourceName: v1.ResourceCPU, | ||||
| 			apiSpecResources: []v1.ResourceRequirements{ | ||||
| 				{Limits: res150m150Mi, Requests: res150m150Mi}, | ||||
| 				{Limits: res250m250Mi, Requests: res250m250Mi}, | ||||
| 				{Limits: res350m350Mi, Requests: res350m350Mi}, | ||||
| 			}, | ||||
| 			apiStatusResources: []v1.ResourceRequirements{ | ||||
| 				{Limits: res100m100Mi, Requests: res100m100Mi}, | ||||
| 				{Limits: res200m200Mi, Requests: res200m200Mi}, | ||||
| 				{Limits: res300m300Mi, Requests: res300m300Mi}, | ||||
| 			}, | ||||
| 			requiresRestart:         []bool{false, false, false}, | ||||
| 			invokeUpdateResources:   true, | ||||
| 			expectedCurrentLimits:   []v1.ResourceList{res150m100Mi, res250m200Mi, res350m300Mi}, | ||||
| 			expectedCurrentRequests: []v1.ResourceList{res150m100Mi, res250m200Mi, res350m300Mi}, | ||||
| 		}, | ||||
| 		"Guaranteed QoS Pod - CPU & memory resize requested, update memory": { | ||||
| 			resourceName: v1.ResourceMemory, | ||||
| 			apiSpecResources: []v1.ResourceRequirements{ | ||||
| 				{Limits: res150m150Mi, Requests: res150m150Mi}, | ||||
| 				{Limits: res250m250Mi, Requests: res250m250Mi}, | ||||
| 				{Limits: res350m350Mi, Requests: res350m350Mi}, | ||||
| 			}, | ||||
| 			apiStatusResources: []v1.ResourceRequirements{ | ||||
| 				{Limits: res100m100Mi, Requests: res100m100Mi}, | ||||
| 				{Limits: res200m200Mi, Requests: res200m200Mi}, | ||||
| 				{Limits: res300m300Mi, Requests: res300m300Mi}, | ||||
| 			}, | ||||
| 			requiresRestart:         []bool{false, false, false}, | ||||
| 			invokeUpdateResources:   true, | ||||
| 			expectedCurrentLimits:   []v1.ResourceList{res100m150Mi, res200m250Mi, res300m350Mi}, | ||||
| 			expectedCurrentRequests: []v1.ResourceList{res100m150Mi, res200m250Mi, res300m350Mi}, | ||||
| 		}, | ||||
| 	} { | ||||
| 		var containersToUpdate []containerToUpdateInfo | ||||
| 		for idx := range pod.Spec.Containers { | ||||
| 			// default resize policy when pod resize feature is enabled | ||||
| 			pod.Spec.Containers[idx].Resources = tc.apiSpecResources[idx] | ||||
| 			pod.Status.ContainerStatuses[idx].Resources = &tc.apiStatusResources[idx] | ||||
| 			cInfo := containerToUpdateInfo{ | ||||
| 				apiContainerIdx: idx, | ||||
| 				kubeContainerID: kubecontainer.ContainerID{}, | ||||
| 				desiredContainerResources: containerResources{ | ||||
| 					memoryLimit:   tc.apiSpecResources[idx].Limits.Memory().Value(), | ||||
| 					memoryRequest: tc.apiSpecResources[idx].Requests.Memory().Value(), | ||||
| 					cpuLimit:      tc.apiSpecResources[idx].Limits.Cpu().MilliValue(), | ||||
| 					cpuRequest:    tc.apiSpecResources[idx].Requests.Cpu().MilliValue(), | ||||
| 				}, | ||||
| 				currentContainerResources: &containerResources{ | ||||
| 					memoryLimit:   tc.apiStatusResources[idx].Limits.Memory().Value(), | ||||
| 					memoryRequest: tc.apiStatusResources[idx].Requests.Memory().Value(), | ||||
| 					cpuLimit:      tc.apiStatusResources[idx].Limits.Cpu().MilliValue(), | ||||
| 					cpuRequest:    tc.apiStatusResources[idx].Requests.Cpu().MilliValue(), | ||||
| 				}, | ||||
| 			} | ||||
| 			containersToUpdate = append(containersToUpdate, cInfo) | ||||
| 		} | ||||
| 		fakeRuntime.Called = []string{} | ||||
| 		err := m.updatePodContainerResources(pod, tc.resourceName, containersToUpdate) | ||||
| 		assert.NoError(t, err, dsc) | ||||
|  | ||||
| 		if tc.invokeUpdateResources { | ||||
| 			assert.Contains(t, fakeRuntime.Called, "UpdateContainerResources", dsc) | ||||
| 		} | ||||
| 		for idx := range pod.Spec.Containers { | ||||
| 			assert.Equal(t, tc.expectedCurrentLimits[idx].Memory().Value(), containersToUpdate[idx].currentContainerResources.memoryLimit, dsc) | ||||
| 			assert.Equal(t, tc.expectedCurrentRequests[idx].Memory().Value(), containersToUpdate[idx].currentContainerResources.memoryRequest, dsc) | ||||
| 			assert.Equal(t, tc.expectedCurrentLimits[idx].Cpu().MilliValue(), containersToUpdate[idx].currentContainerResources.cpuLimit, dsc) | ||||
| 			assert.Equal(t, tc.expectedCurrentRequests[idx].Cpu().MilliValue(), containersToUpdate[idx].currentContainerResources.cpuRequest, dsc) | ||||
| 		} | ||||
| 	} | ||||
| } | ||||
|   | ||||
| @@ -22,7 +22,9 @@ import ( | ||||
|  | ||||
| 	v1 "k8s.io/api/core/v1" | ||||
| 	kubetypes "k8s.io/apimachinery/pkg/types" | ||||
| 	utilfeature "k8s.io/apiserver/pkg/util/feature" | ||||
| 	"k8s.io/klog/v2" | ||||
| 	"k8s.io/kubernetes/pkg/features" | ||||
| 	kubecontainer "k8s.io/kubernetes/pkg/kubelet/container" | ||||
| 	"k8s.io/kubernetes/pkg/kubelet/types" | ||||
| ) | ||||
| @@ -33,6 +35,7 @@ const ( | ||||
| 	podTerminationGracePeriodLabel = "io.kubernetes.pod.terminationGracePeriod" | ||||
|  | ||||
| 	containerHashLabel                     = "io.kubernetes.container.hash" | ||||
| 	containerHashWithoutResourcesLabel     = "io.kubernetes.container.hashWithoutResources" | ||||
| 	containerRestartCountLabel             = "io.kubernetes.container.restartCount" | ||||
| 	containerTerminationMessagePathLabel   = "io.kubernetes.container.terminationMessagePath" | ||||
| 	containerTerminationMessagePolicyLabel = "io.kubernetes.container.terminationMessagePolicy" | ||||
| @@ -62,6 +65,7 @@ type labeledContainerInfo struct { | ||||
|  | ||||
| type annotatedContainerInfo struct { | ||||
| 	Hash                      uint64 | ||||
| 	HashWithoutResources      uint64 | ||||
| 	RestartCount              int | ||||
| 	PodDeletionGracePeriod    *int64 | ||||
| 	PodTerminationGracePeriod *int64 | ||||
| @@ -113,6 +117,9 @@ func newContainerAnnotations(container *v1.Container, pod *v1.Pod, restartCount | ||||
| 	} | ||||
|  | ||||
| 	annotations[containerHashLabel] = strconv.FormatUint(kubecontainer.HashContainer(container), 16) | ||||
| 	if utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) { | ||||
| 		annotations[containerHashWithoutResourcesLabel] = strconv.FormatUint(kubecontainer.HashContainerWithoutResources(container), 16) | ||||
| 	} | ||||
| 	annotations[containerRestartCountLabel] = strconv.Itoa(restartCount) | ||||
| 	annotations[containerTerminationMessagePathLabel] = container.TerminationMessagePath | ||||
| 	annotations[containerTerminationMessagePolicyLabel] = string(container.TerminationMessagePolicy) | ||||
| @@ -193,6 +200,11 @@ func getContainerInfoFromAnnotations(annotations map[string]string) *annotatedCo | ||||
| 	if containerInfo.Hash, err = getUint64ValueFromLabel(annotations, containerHashLabel); err != nil { | ||||
| 		klog.ErrorS(err, "Unable to get label value from annotations", "label", containerHashLabel, "annotations", annotations) | ||||
| 	} | ||||
| 	if utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) { | ||||
| 		if containerInfo.HashWithoutResources, err = getUint64ValueFromLabel(annotations, containerHashWithoutResourcesLabel); err != nil { | ||||
| 			klog.ErrorS(err, "Unable to get label value from annotations", "label", containerHashWithoutResourcesLabel, "annotations", annotations) | ||||
| 		} | ||||
| 	} | ||||
| 	if containerInfo.RestartCount, err = getIntValueFromLabel(annotations, containerRestartCountLabel); err != nil { | ||||
| 		klog.ErrorS(err, "Unable to get label value from annotations", "label", containerRestartCountLabel, "annotations", annotations) | ||||
| 	} | ||||
|   | ||||
| @@ -23,6 +23,9 @@ import ( | ||||
| 	v1 "k8s.io/api/core/v1" | ||||
| 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" | ||||
| 	"k8s.io/apimachinery/pkg/util/intstr" | ||||
| 	utilfeature "k8s.io/apiserver/pkg/util/feature" | ||||
| 	featuregatetesting "k8s.io/component-base/featuregate/testing" | ||||
| 	"k8s.io/kubernetes/pkg/features" | ||||
| 	kubecontainer "k8s.io/kubernetes/pkg/kubelet/container" | ||||
| ) | ||||
|  | ||||
| @@ -152,11 +155,13 @@ func TestContainerAnnotations(t *testing.T) { | ||||
| 		PodDeletionGracePeriod:    pod.DeletionGracePeriodSeconds, | ||||
| 		PodTerminationGracePeriod: pod.Spec.TerminationGracePeriodSeconds, | ||||
| 		Hash:                      kubecontainer.HashContainer(container), | ||||
| 		HashWithoutResources:      kubecontainer.HashContainerWithoutResources(container), | ||||
| 		RestartCount:              restartCount, | ||||
| 		TerminationMessagePath:    container.TerminationMessagePath, | ||||
| 		PreStopHandler:            container.Lifecycle.PreStop, | ||||
| 	} | ||||
|  | ||||
| 	defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.InPlacePodVerticalScaling, true)() | ||||
| 	// Test whether we can get right information from label | ||||
| 	annotations := newContainerAnnotations(container, pod, restartCount, opts) | ||||
| 	containerInfo := getContainerInfoFromAnnotations(annotations) | ||||
| @@ -177,6 +182,7 @@ func TestContainerAnnotations(t *testing.T) { | ||||
| 	expected.PreStopHandler = nil | ||||
| 	// Because container is changed, the Hash should be updated | ||||
| 	expected.Hash = kubecontainer.HashContainer(container) | ||||
| 	expected.HashWithoutResources = kubecontainer.HashContainerWithoutResources(container) | ||||
| 	annotations = newContainerAnnotations(container, pod, restartCount, opts) | ||||
| 	containerInfo = getContainerInfoFromAnnotations(annotations) | ||||
| 	if !reflect.DeepEqual(containerInfo, expected) { | ||||
|   | ||||
| @@ -76,6 +76,8 @@ type GenericPLEG struct { | ||||
| 	runningMu sync.Mutex | ||||
| 	// Indicates relisting related parameters | ||||
| 	relistDuration *RelistDuration | ||||
| 	// Mutex to serialize updateCache called by relist vs UpdateCache interface | ||||
| 	podCacheMutex sync.Mutex | ||||
| } | ||||
|  | ||||
| // plegContainerState has a one-to-one mapping to the | ||||
| @@ -436,6 +438,8 @@ func (g *GenericPLEG) updateCache(ctx context.Context, pod *kubecontainer.Pod, p | ||||
| 		return nil, true | ||||
| 	} | ||||
|  | ||||
| 	g.podCacheMutex.Lock() | ||||
| 	defer g.podCacheMutex.Unlock() | ||||
| 	timestamp := g.clock.Now() | ||||
|  | ||||
| 	status, err := g.runtime.GetPodStatus(ctx, pod.ID, pod.Name, pod.Namespace) | ||||
| @@ -478,6 +482,16 @@ func (g *GenericPLEG) updateCache(ctx context.Context, pod *kubecontainer.Pod, p | ||||
| 	return err, g.cache.Set(pod.ID, status, err, timestamp) | ||||
| } | ||||
|  | ||||
| func (g *GenericPLEG) UpdateCache(pod *kubecontainer.Pod, pid types.UID) error { | ||||
| 	if !g.cacheEnabled() { | ||||
| 		return fmt.Errorf("pod cache disabled") | ||||
| 	} | ||||
| 	if pod == nil { | ||||
| 		return fmt.Errorf("pod cannot be nil") | ||||
| 	} | ||||
| 	return g.updateCache(pod, pid) | ||||
| } | ||||
|  | ||||
| func updateEvents(eventsByPodID map[types.UID][]*PodLifecycleEvent, e *PodLifecycleEvent) { | ||||
| 	if e == nil { | ||||
| 		return | ||||
|   | ||||
| @@ -20,6 +20,7 @@ import ( | ||||
| 	"time" | ||||
|  | ||||
| 	"k8s.io/apimachinery/pkg/types" | ||||
| 	kubecontainer "k8s.io/kubernetes/pkg/kubelet/container" | ||||
| ) | ||||
|  | ||||
| // PodLifeCycleEventType define the event type of pod life cycle events. | ||||
| @@ -68,4 +69,5 @@ type PodLifecycleEventGenerator interface { | ||||
| 	Watch() chan *PodLifecycleEvent | ||||
| 	Healthy() (bool, error) | ||||
| 	Relist() | ||||
| 	UpdateCache(*kubecontainer.Pod, types.UID) error | ||||
| } | ||||
|   | ||||
| @@ -17,6 +17,7 @@ limitations under the License. | ||||
| package prober | ||||
|  | ||||
| import ( | ||||
| 	"io/ioutil" | ||||
| 	"reflect" | ||||
| 	"sync" | ||||
|  | ||||
| @@ -109,8 +110,14 @@ func newTestManager() *manager { | ||||
| 	podStartupLatencyTracker := kubeletutil.NewPodStartupLatencyTracker() | ||||
| 	// Add test pod to pod manager, so that status manager can get the pod from pod manager if needed. | ||||
| 	podManager.AddPod(getTestPod()) | ||||
| 	testRootDir := "" | ||||
| 	if tempDir, err := ioutil.TempDir("", "kubelet_test."); err != nil { | ||||
| 		return nil | ||||
| 	} else { | ||||
| 		testRootDir = tempDir | ||||
| 	} | ||||
| 	m := NewManager( | ||||
| 		status.NewManager(&fake.Clientset{}, podManager, &statustest.FakePodDeletionSafetyProvider{}, podStartupLatencyTracker), | ||||
| 		status.NewManager(&fake.Clientset{}, podManager, &statustest.FakePodDeletionSafetyProvider{}, podStartupLatencyTracker, testRootDir), | ||||
| 		results.NewManager(), | ||||
| 		results.NewManager(), | ||||
| 		results.NewManager(), | ||||
|   | ||||
| @@ -19,6 +19,7 @@ package prober | ||||
| import ( | ||||
| 	"context" | ||||
| 	"fmt" | ||||
| 	"io/ioutil" | ||||
| 	"testing" | ||||
| 	"time" | ||||
|  | ||||
| @@ -153,7 +154,13 @@ func TestDoProbe(t *testing.T) { | ||||
| 			} | ||||
|  | ||||
| 			// Clean up. | ||||
| 			m.statusManager = status.NewManager(&fake.Clientset{}, kubepod.NewBasicPodManager(nil), &statustest.FakePodDeletionSafetyProvider{}, kubeletutil.NewPodStartupLatencyTracker()) | ||||
| 			testRootDir := "" | ||||
| 			if tempDir, err := ioutil.TempDir("", "kubelet_test."); err != nil { | ||||
| 				t.Fatalf("can't make a temp rootdir: %v", err) | ||||
| 			} else { | ||||
| 				testRootDir = tempDir | ||||
| 			} | ||||
| 			m.statusManager = status.NewManager(&fake.Clientset{}, kubepod.NewBasicPodManager(nil), &statustest.FakePodDeletionSafetyProvider{}, kubeletutil.NewPodStartupLatencyTracker(), testRootDir) | ||||
| 			resultsManager(m, probeType).Remove(testContainerID) | ||||
| 		} | ||||
| 	} | ||||
|   | ||||
| @@ -18,7 +18,10 @@ package qos | ||||
|  | ||||
| import ( | ||||
| 	v1 "k8s.io/api/core/v1" | ||||
| 	utilfeature "k8s.io/apiserver/pkg/util/feature" | ||||
| 	podutil "k8s.io/kubernetes/pkg/api/v1/pod" | ||||
| 	v1qos "k8s.io/kubernetes/pkg/apis/core/v1/helper/qos" | ||||
| 	"k8s.io/kubernetes/pkg/features" | ||||
| 	"k8s.io/kubernetes/pkg/kubelet/types" | ||||
| ) | ||||
|  | ||||
| @@ -60,6 +63,11 @@ func GetContainerOOMScoreAdjust(pod *v1.Pod, container *v1.Container, memoryCapa | ||||
| 	// targets for OOM kills. | ||||
| 	// Note that this is a heuristic, it won't work if a container has many small processes. | ||||
| 	memoryRequest := container.Resources.Requests.Memory().Value() | ||||
| 	if utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) { | ||||
| 		if cs, ok := podutil.GetContainerStatus(pod.Status.ContainerStatuses, container.Name); ok { | ||||
| 			memoryRequest = cs.ResourcesAllocated.Memory().Value() | ||||
| 		} | ||||
| 	} | ||||
| 	oomScoreAdjust := 1000 - (1000*memoryRequest)/memoryCapacity | ||||
| 	// A guaranteed pod using 100% of memory can have an OOM score of 10. Ensure | ||||
| 	// that burstable pods have a higher OOM score adjustment. | ||||
|   | ||||
| @@ -85,7 +85,7 @@ func TestRunOnce(t *testing.T) { | ||||
| 		recorder:         &record.FakeRecorder{}, | ||||
| 		cadvisor:         cadvisor, | ||||
| 		nodeLister:       testNodeLister{}, | ||||
| 		statusManager:    status.NewManager(nil, podManager, &statustest.FakePodDeletionSafetyProvider{}, podStartupLatencyTracker), | ||||
| 		statusManager:    status.NewManager(nil, podManager, &statustest.FakePodDeletionSafetyProvider{}, podStartupLatencyTracker, basePath), | ||||
| 		podManager:       podManager, | ||||
| 		podWorkers:       &fakePodWorkers{}, | ||||
| 		os:               &containertest.FakeOS{}, | ||||
|   | ||||
							
								
								
									
										93
									
								
								pkg/kubelet/status/fake_status_manager.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										93
									
								
								pkg/kubelet/status/fake_status_manager.go
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,93 @@ | ||||
| /* | ||||
| Copyright 2021 The Kubernetes Authors. | ||||
|  | ||||
| Licensed under the Apache License, Version 2.0 (the "License"); | ||||
| you may not use this file except in compliance with the License. | ||||
| You may obtain a copy of the License at | ||||
|  | ||||
|     http://www.apache.org/licenses/LICENSE-2.0 | ||||
|  | ||||
| Unless required by applicable law or agreed to in writing, software | ||||
| distributed under the License is distributed on an "AS IS" BASIS, | ||||
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
| See the License for the specific language governing permissions and | ||||
| limitations under the License. | ||||
| */ | ||||
|  | ||||
| package status | ||||
|  | ||||
| import ( | ||||
| 	v1 "k8s.io/api/core/v1" | ||||
| 	"k8s.io/apimachinery/pkg/types" | ||||
| 	"k8s.io/klog/v2" | ||||
| 	kubecontainer "k8s.io/kubernetes/pkg/kubelet/container" | ||||
| 	"k8s.io/kubernetes/pkg/kubelet/status/state" | ||||
| ) | ||||
|  | ||||
| type fakeManager struct { | ||||
| 	state state.State | ||||
| } | ||||
|  | ||||
| func (m *fakeManager) Start() { | ||||
| 	klog.InfoS("Start()") | ||||
| 	return | ||||
| } | ||||
|  | ||||
| func (m *fakeManager) GetPodStatus(uid types.UID) (v1.PodStatus, bool) { | ||||
| 	klog.InfoS("GetPodStatus()") | ||||
| 	return v1.PodStatus{}, false | ||||
| } | ||||
|  | ||||
| func (m *fakeManager) SetPodStatus(pod *v1.Pod, status v1.PodStatus) { | ||||
| 	klog.InfoS("SetPodStatus()") | ||||
| 	return | ||||
| } | ||||
|  | ||||
| func (m *fakeManager) SetContainerReadiness(podUID types.UID, containerID kubecontainer.ContainerID, ready bool) { | ||||
| 	klog.InfoS("SetContainerReadiness()") | ||||
| 	return | ||||
| } | ||||
|  | ||||
| func (m *fakeManager) SetContainerStartup(podUID types.UID, containerID kubecontainer.ContainerID, started bool) { | ||||
| 	klog.InfoS("SetContainerStartup()") | ||||
| 	return | ||||
| } | ||||
|  | ||||
| func (m *fakeManager) TerminatePod(pod *v1.Pod) { | ||||
| 	klog.InfoS("TerminatePod()") | ||||
| 	return | ||||
| } | ||||
|  | ||||
| func (m *fakeManager) RemoveOrphanedStatuses(podUIDs map[types.UID]bool) { | ||||
| 	klog.InfoS("RemoveOrphanedStatuses()") | ||||
| 	return | ||||
| } | ||||
|  | ||||
| func (m *fakeManager) State() state.Reader { | ||||
| 	klog.InfoS("State()") | ||||
| 	return m.state | ||||
| } | ||||
|  | ||||
| func (m *fakeManager) SetPodAllocation(pod *v1.Pod) error { | ||||
| 	klog.InfoS("SetPodAllocation()") | ||||
| 	for _, container := range pod.Spec.Containers { | ||||
| 		var alloc v1.ResourceList | ||||
| 		if container.Resources.Requests != nil { | ||||
| 			alloc = container.Resources.Requests.DeepCopy() | ||||
| 		} | ||||
| 		m.state.SetContainerResourceAllocation(string(pod.UID), container.Name, alloc) | ||||
| 	} | ||||
| 	return nil | ||||
| } | ||||
|  | ||||
| func (m *fakeManager) SetPodResizeStatus(podUID types.UID, resizeStatus v1.PodResizeStatus) error { | ||||
| 	klog.InfoS("SetPodResizeStatus()") | ||||
| 	return nil | ||||
| } | ||||
|  | ||||
| // NewFakeManager creates empty/fake memory manager | ||||
| func NewFakeManager() Manager { | ||||
| 	return &fakeManager{ | ||||
| 		state: state.NewStateMemory(), | ||||
| 	} | ||||
| } | ||||
							
								
								
									
										65
									
								
								pkg/kubelet/status/state/checkpoint.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										65
									
								
								pkg/kubelet/status/state/checkpoint.go
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,65 @@ | ||||
| /* | ||||
| Copyright 2021 The Kubernetes Authors. | ||||
|  | ||||
| Licensed under the Apache License, Version 2.0 (the "License"); | ||||
| you may not use this file except in compliance with the License. | ||||
| You may obtain a copy of the License at | ||||
|  | ||||
|     http://www.apache.org/licenses/LICENSE-2.0 | ||||
|  | ||||
| Unless required by applicable law or agreed to in writing, software | ||||
| distributed under the License is distributed on an "AS IS" BASIS, | ||||
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
| See the License for the specific language governing permissions and | ||||
| limitations under the License. | ||||
| */ | ||||
|  | ||||
| package state | ||||
|  | ||||
| import ( | ||||
| 	"encoding/json" | ||||
|  | ||||
| 	"k8s.io/api/core/v1" | ||||
| 	"k8s.io/kubernetes/pkg/kubelet/checkpointmanager" | ||||
| 	"k8s.io/kubernetes/pkg/kubelet/checkpointmanager/checksum" | ||||
| ) | ||||
|  | ||||
| var _ checkpointmanager.Checkpoint = &PodResourceAllocationCheckpoint{} | ||||
|  | ||||
| // PodResourceAllocationCheckpoint is used to store resources allocated to a pod in checkpoint | ||||
| type PodResourceAllocationCheckpoint struct { | ||||
| 	AllocationEntries   map[string]map[string]v1.ResourceList `json:"allocationEntries,omitempty"` | ||||
| 	ResizeStatusEntries map[string]v1.PodResizeStatus         `json:"resizeStatusEntries,omitempty"` | ||||
| 	Checksum            checksum.Checksum                     `json:"checksum"` | ||||
| } | ||||
|  | ||||
| // NewPodResourceAllocationCheckpoint returns an instance of Checkpoint | ||||
| func NewPodResourceAllocationCheckpoint() *PodResourceAllocationCheckpoint { | ||||
| 	//lint:ignore unexported-type-in-api user-facing error message | ||||
| 	return &PodResourceAllocationCheckpoint{ | ||||
| 		AllocationEntries:   make(map[string]map[string]v1.ResourceList), | ||||
| 		ResizeStatusEntries: make(map[string]v1.PodResizeStatus), | ||||
| 	} | ||||
| } | ||||
|  | ||||
| // MarshalCheckpoint returns marshalled checkpoint | ||||
| func (prc *PodResourceAllocationCheckpoint) MarshalCheckpoint() ([]byte, error) { | ||||
| 	// make sure checksum wasn't set before so it doesn't affect output checksum | ||||
| 	prc.Checksum = 0 | ||||
| 	prc.Checksum = checksum.New(prc) | ||||
| 	return json.Marshal(*prc) | ||||
| } | ||||
|  | ||||
| // UnmarshalCheckpoint tries to unmarshal passed bytes to checkpoint | ||||
| func (prc *PodResourceAllocationCheckpoint) UnmarshalCheckpoint(blob []byte) error { | ||||
| 	return json.Unmarshal(blob, prc) | ||||
| } | ||||
|  | ||||
| // VerifyChecksum verifies that current checksum of checkpoint is valid | ||||
| func (prc *PodResourceAllocationCheckpoint) VerifyChecksum() error { | ||||
| 	ck := prc.Checksum | ||||
| 	prc.Checksum = 0 | ||||
| 	err := ck.Verify(prc) | ||||
| 	prc.Checksum = ck | ||||
| 	return err | ||||
| } | ||||
							
								
								
									
										62
									
								
								pkg/kubelet/status/state/state.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										62
									
								
								pkg/kubelet/status/state/state.go
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,62 @@ | ||||
| /* | ||||
| Copyright 2021 The Kubernetes Authors. | ||||
|  | ||||
| Licensed under the Apache License, Version 2.0 (the "License"); | ||||
| you may not use this file except in compliance with the License. | ||||
| You may obtain a copy of the License at | ||||
|  | ||||
|     http://www.apache.org/licenses/LICENSE-2.0 | ||||
|  | ||||
| Unless required by applicable law or agreed to in writing, software | ||||
| distributed under the License is distributed on an "AS IS" BASIS, | ||||
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
| See the License for the specific language governing permissions and | ||||
| limitations under the License. | ||||
| */ | ||||
|  | ||||
| package state | ||||
|  | ||||
| import ( | ||||
| 	"k8s.io/api/core/v1" | ||||
| ) | ||||
|  | ||||
| // PodResourceAllocation type is used in tracking resources allocated to pod's containers | ||||
| type PodResourceAllocation map[string]map[string]v1.ResourceList | ||||
|  | ||||
| // PodResizeStatus type is used in tracking the last resize decision for pod | ||||
| type PodResizeStatus map[string]v1.PodResizeStatus | ||||
|  | ||||
| // Clone returns a copy of PodResourceAllocation | ||||
| func (pr PodResourceAllocation) Clone() PodResourceAllocation { | ||||
| 	prCopy := make(PodResourceAllocation) | ||||
| 	for pod := range pr { | ||||
| 		prCopy[pod] = make(map[string]v1.ResourceList) | ||||
| 		for container, alloc := range pr[pod] { | ||||
| 			prCopy[pod][container] = alloc.DeepCopy() | ||||
| 		} | ||||
| 	} | ||||
| 	return prCopy | ||||
| } | ||||
|  | ||||
| // Reader interface used to read current pod resource allocation state | ||||
| type Reader interface { | ||||
| 	GetContainerResourceAllocation(podUID string, containerName string) (v1.ResourceList, bool) | ||||
| 	GetPodResourceAllocation() PodResourceAllocation | ||||
| 	GetPodResizeStatus(podUID string) (v1.PodResizeStatus, bool) | ||||
| 	GetResizeStatus() PodResizeStatus | ||||
| } | ||||
|  | ||||
| type writer interface { | ||||
| 	SetContainerResourceAllocation(podUID string, containerName string, alloc v1.ResourceList) error | ||||
| 	SetPodResourceAllocation(PodResourceAllocation) error | ||||
| 	SetPodResizeStatus(podUID string, resizeStatus v1.PodResizeStatus) error | ||||
| 	SetResizeStatus(PodResizeStatus) error | ||||
| 	Delete(podUID string, containerName string) error | ||||
| 	ClearState() error | ||||
| } | ||||
|  | ||||
| // State interface provides methods for tracking and setting pod resource allocation | ||||
| type State interface { | ||||
| 	Reader | ||||
| 	writer | ||||
| } | ||||
							
								
								
									
										179
									
								
								pkg/kubelet/status/state/state_checkpoint.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										179
									
								
								pkg/kubelet/status/state/state_checkpoint.go
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,179 @@ | ||||
| /* | ||||
| Copyright 2021 The Kubernetes Authors. | ||||
|  | ||||
| Licensed under the Apache License, Version 2.0 (the "License"); | ||||
| you may not use this file except in compliance with the License. | ||||
| You may obtain a copy of the License at | ||||
|  | ||||
|     http://www.apache.org/licenses/LICENSE-2.0 | ||||
|  | ||||
| Unless required by applicable law or agreed to in writing, software | ||||
| distributed under the License is distributed on an "AS IS" BASIS, | ||||
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
| See the License for the specific language governing permissions and | ||||
| limitations under the License. | ||||
| */ | ||||
|  | ||||
| package state | ||||
|  | ||||
| import ( | ||||
| 	"fmt" | ||||
| 	"path" | ||||
| 	"sync" | ||||
|  | ||||
| 	"k8s.io/api/core/v1" | ||||
| 	"k8s.io/klog/v2" | ||||
| 	"k8s.io/kubernetes/pkg/kubelet/checkpointmanager" | ||||
| 	"k8s.io/kubernetes/pkg/kubelet/checkpointmanager/errors" | ||||
| ) | ||||
|  | ||||
| var _ State = &stateCheckpoint{} | ||||
|  | ||||
| type stateCheckpoint struct { | ||||
| 	mux               sync.RWMutex | ||||
| 	cache             State | ||||
| 	checkpointManager checkpointmanager.CheckpointManager | ||||
| 	checkpointName    string | ||||
| } | ||||
|  | ||||
| // NewStateCheckpoint creates new State for keeping track of pod resource allocations with checkpoint backend | ||||
| func NewStateCheckpoint(stateDir, checkpointName string) (State, error) { | ||||
| 	checkpointManager, err := checkpointmanager.NewCheckpointManager(stateDir) | ||||
| 	if err != nil { | ||||
| 		return nil, fmt.Errorf("failed to initialize checkpoint manager for pod allocation tracking: %v", err) | ||||
| 	} | ||||
| 	stateCheckpoint := &stateCheckpoint{ | ||||
| 		cache:             NewStateMemory(), | ||||
| 		checkpointManager: checkpointManager, | ||||
| 		checkpointName:    checkpointName, | ||||
| 	} | ||||
|  | ||||
| 	if err := stateCheckpoint.restoreState(); err != nil { | ||||
| 		//lint:ignore ST1005 user-facing error message | ||||
| 		return nil, fmt.Errorf("could not restore state from checkpoint: %v, please drain this node and delete pod allocation checkpoint file %q before restarting Kubelet", err, path.Join(stateDir, checkpointName)) | ||||
| 	} | ||||
| 	return stateCheckpoint, nil | ||||
| } | ||||
|  | ||||
| // restores state from a checkpoint and creates it if it doesn't exist | ||||
| func (sc *stateCheckpoint) restoreState() error { | ||||
| 	sc.mux.Lock() | ||||
| 	defer sc.mux.Unlock() | ||||
| 	var err error | ||||
|  | ||||
| 	checkpoint := NewPodResourceAllocationCheckpoint() | ||||
|  | ||||
| 	if err = sc.checkpointManager.GetCheckpoint(sc.checkpointName, checkpoint); err != nil { | ||||
| 		if err == errors.ErrCheckpointNotFound { | ||||
| 			return sc.storeState() | ||||
| 		} | ||||
| 		return err | ||||
| 	} | ||||
|  | ||||
| 	sc.cache.SetPodResourceAllocation(checkpoint.AllocationEntries) | ||||
| 	sc.cache.SetResizeStatus(checkpoint.ResizeStatusEntries) | ||||
| 	klog.V(2).InfoS("State checkpoint: restored pod resource allocation state from checkpoint") | ||||
| 	return nil | ||||
| } | ||||
|  | ||||
| // saves state to a checkpoint, caller is responsible for locking | ||||
| func (sc *stateCheckpoint) storeState() error { | ||||
| 	checkpoint := NewPodResourceAllocationCheckpoint() | ||||
|  | ||||
| 	podAllocation := sc.cache.GetPodResourceAllocation() | ||||
| 	for pod := range podAllocation { | ||||
| 		checkpoint.AllocationEntries[pod] = make(map[string]v1.ResourceList) | ||||
| 		for container, alloc := range podAllocation[pod] { | ||||
| 			checkpoint.AllocationEntries[pod][container] = alloc | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	podResizeStatus := sc.cache.GetResizeStatus() | ||||
| 	checkpoint.ResizeStatusEntries = make(map[string]v1.PodResizeStatus) | ||||
| 	for pUID, rStatus := range podResizeStatus { | ||||
| 		checkpoint.ResizeStatusEntries[pUID] = rStatus | ||||
| 	} | ||||
|  | ||||
| 	err := sc.checkpointManager.CreateCheckpoint(sc.checkpointName, checkpoint) | ||||
| 	if err != nil { | ||||
| 		klog.ErrorS(err, "Failed to save pod allocation checkpoint") | ||||
| 		return err | ||||
| 	} | ||||
| 	return nil | ||||
| } | ||||
|  | ||||
| // GetContainerResourceAllocation returns current resources allocated to a pod's container | ||||
| func (sc *stateCheckpoint) GetContainerResourceAllocation(podUID string, containerName string) (v1.ResourceList, bool) { | ||||
| 	sc.mux.RLock() | ||||
| 	defer sc.mux.RUnlock() | ||||
| 	return sc.cache.GetContainerResourceAllocation(podUID, containerName) | ||||
| } | ||||
|  | ||||
| // GetPodResourceAllocation returns current pod resource allocation | ||||
| func (sc *stateCheckpoint) GetPodResourceAllocation() PodResourceAllocation { | ||||
| 	sc.mux.RLock() | ||||
| 	defer sc.mux.RUnlock() | ||||
| 	return sc.cache.GetPodResourceAllocation() | ||||
| } | ||||
|  | ||||
| // GetPodResizeStatus returns the last resize decision for a pod | ||||
| func (sc *stateCheckpoint) GetPodResizeStatus(podUID string) (v1.PodResizeStatus, bool) { | ||||
| 	sc.mux.RLock() | ||||
| 	defer sc.mux.RUnlock() | ||||
| 	return sc.cache.GetPodResizeStatus(podUID) | ||||
| } | ||||
|  | ||||
| // GetResizeStatus returns the set of resize decisions made | ||||
| func (sc *stateCheckpoint) GetResizeStatus() PodResizeStatus { | ||||
| 	sc.mux.RLock() | ||||
| 	defer sc.mux.RUnlock() | ||||
| 	return sc.cache.GetResizeStatus() | ||||
| } | ||||
|  | ||||
| // SetContainerResourceAllocation sets resources allocated to a pod's container | ||||
| func (sc *stateCheckpoint) SetContainerResourceAllocation(podUID string, containerName string, alloc v1.ResourceList) error { | ||||
| 	sc.mux.Lock() | ||||
| 	defer sc.mux.Unlock() | ||||
| 	sc.cache.SetContainerResourceAllocation(podUID, containerName, alloc) | ||||
| 	return sc.storeState() | ||||
| } | ||||
|  | ||||
| // SetPodResourceAllocation sets pod resource allocation | ||||
| func (sc *stateCheckpoint) SetPodResourceAllocation(a PodResourceAllocation) error { | ||||
| 	sc.mux.Lock() | ||||
| 	defer sc.mux.Unlock() | ||||
| 	sc.cache.SetPodResourceAllocation(a) | ||||
| 	return sc.storeState() | ||||
| } | ||||
|  | ||||
| // SetPodResizeStatus sets the last resize decision for a pod | ||||
| func (sc *stateCheckpoint) SetPodResizeStatus(podUID string, resizeStatus v1.PodResizeStatus) error { | ||||
| 	sc.mux.Lock() | ||||
| 	defer sc.mux.Unlock() | ||||
| 	sc.cache.SetPodResizeStatus(podUID, resizeStatus) | ||||
| 	return sc.storeState() | ||||
| } | ||||
|  | ||||
| // SetResizeStatus sets the resize decisions | ||||
| func (sc *stateCheckpoint) SetResizeStatus(rs PodResizeStatus) error { | ||||
| 	sc.mux.Lock() | ||||
| 	defer sc.mux.Unlock() | ||||
| 	sc.cache.SetResizeStatus(rs) | ||||
| 	return sc.storeState() | ||||
| } | ||||
|  | ||||
| // Delete deletes allocations for specified pod | ||||
| func (sc *stateCheckpoint) Delete(podUID string, containerName string) error { | ||||
| 	sc.mux.Lock() | ||||
| 	defer sc.mux.Unlock() | ||||
| 	sc.cache.Delete(podUID, containerName) | ||||
| 	return sc.storeState() | ||||
| } | ||||
|  | ||||
| // ClearState clears the state and saves it in a checkpoint | ||||
| func (sc *stateCheckpoint) ClearState() error { | ||||
| 	sc.mux.Lock() | ||||
| 	defer sc.mux.Unlock() | ||||
| 	sc.cache.ClearState() | ||||
| 	return sc.storeState() | ||||
| } | ||||
							
								
								
									
										152
									
								
								pkg/kubelet/status/state/state_mem.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										152
									
								
								pkg/kubelet/status/state/state_mem.go
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,152 @@ | ||||
| /* | ||||
| Copyright 2021 The Kubernetes Authors. | ||||
|  | ||||
| Licensed under the Apache License, Version 2.0 (the "License"); | ||||
| you may not use this file except in compliance with the License. | ||||
| You may obtain a copy of the License at | ||||
|  | ||||
|     http://www.apache.org/licenses/LICENSE-2.0 | ||||
|  | ||||
| Unless required by applicable law or agreed to in writing, software | ||||
| distributed under the License is distributed on an "AS IS" BASIS, | ||||
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
| See the License for the specific language governing permissions and | ||||
| limitations under the License. | ||||
| */ | ||||
|  | ||||
| package state | ||||
|  | ||||
| import ( | ||||
| 	"sync" | ||||
|  | ||||
| 	"k8s.io/api/core/v1" | ||||
| 	"k8s.io/klog/v2" | ||||
| ) | ||||
|  | ||||
| type stateMemory struct { | ||||
| 	sync.RWMutex | ||||
| 	podAllocation   PodResourceAllocation | ||||
| 	podResizeStatus PodResizeStatus | ||||
| } | ||||
|  | ||||
| var _ State = &stateMemory{} | ||||
|  | ||||
| // NewStateMemory creates new State to track resources allocated to pods | ||||
| func NewStateMemory() State { | ||||
| 	klog.V(2).InfoS("Initialized new in-memory state store for pod resource allocation tracking") | ||||
| 	return &stateMemory{ | ||||
| 		podAllocation:   PodResourceAllocation{}, | ||||
| 		podResizeStatus: PodResizeStatus{}, | ||||
| 	} | ||||
| } | ||||
|  | ||||
| func (s *stateMemory) GetContainerResourceAllocation(podUID string, containerName string) (v1.ResourceList, bool) { | ||||
| 	s.RLock() | ||||
| 	defer s.RUnlock() | ||||
|  | ||||
| 	alloc, ok := s.podAllocation[podUID][containerName] | ||||
| 	return alloc.DeepCopy(), ok | ||||
| } | ||||
|  | ||||
| func (s *stateMemory) GetPodResourceAllocation() PodResourceAllocation { | ||||
| 	s.RLock() | ||||
| 	defer s.RUnlock() | ||||
| 	return s.podAllocation.Clone() | ||||
| } | ||||
|  | ||||
| func (s *stateMemory) GetPodResizeStatus(podUID string) (v1.PodResizeStatus, bool) { | ||||
| 	s.RLock() | ||||
| 	defer s.RUnlock() | ||||
|  | ||||
| 	resizeStatus, ok := s.podResizeStatus[podUID] | ||||
| 	return resizeStatus, ok | ||||
| } | ||||
|  | ||||
| func (s *stateMemory) GetResizeStatus() PodResizeStatus { | ||||
| 	s.RLock() | ||||
| 	defer s.RUnlock() | ||||
| 	prs := make(map[string]v1.PodResizeStatus) | ||||
| 	for k, v := range s.podResizeStatus { | ||||
| 		prs[k] = v | ||||
| 	} | ||||
| 	return prs | ||||
| } | ||||
|  | ||||
| func (s *stateMemory) SetContainerResourceAllocation(podUID string, containerName string, alloc v1.ResourceList) error { | ||||
| 	s.Lock() | ||||
| 	defer s.Unlock() | ||||
|  | ||||
| 	if _, ok := s.podAllocation[podUID]; !ok { | ||||
| 		s.podAllocation[podUID] = make(map[string]v1.ResourceList) | ||||
| 	} | ||||
|  | ||||
| 	s.podAllocation[podUID][containerName] = alloc | ||||
| 	klog.V(3).InfoS("Updated container resource allocation", "podUID", podUID, "containerName", containerName, "alloc", alloc) | ||||
| 	return nil | ||||
| } | ||||
|  | ||||
| func (s *stateMemory) SetPodResourceAllocation(a PodResourceAllocation) error { | ||||
| 	s.Lock() | ||||
| 	defer s.Unlock() | ||||
|  | ||||
| 	s.podAllocation = a.Clone() | ||||
| 	klog.V(3).InfoS("Updated pod resource allocation", "allocation", a) | ||||
| 	return nil | ||||
| } | ||||
|  | ||||
| func (s *stateMemory) SetPodResizeStatus(podUID string, resizeStatus v1.PodResizeStatus) error { | ||||
| 	s.Lock() | ||||
| 	defer s.Unlock() | ||||
|  | ||||
| 	if resizeStatus != "" { | ||||
| 		s.podResizeStatus[podUID] = resizeStatus | ||||
| 	} else { | ||||
| 		delete(s.podResizeStatus, podUID) | ||||
| 	} | ||||
| 	klog.V(3).InfoS("Updated pod resize state", "podUID", podUID, "resizeStatus", resizeStatus) | ||||
| 	return nil | ||||
| } | ||||
|  | ||||
| func (s *stateMemory) SetResizeStatus(rs PodResizeStatus) error { | ||||
| 	s.Lock() | ||||
| 	defer s.Unlock() | ||||
| 	prs := make(map[string]v1.PodResizeStatus) | ||||
| 	for k, v := range rs { | ||||
| 		prs[k] = v | ||||
| 	} | ||||
| 	s.podResizeStatus = prs | ||||
| 	klog.V(3).InfoS("Updated pod resize state", "resizes", rs) | ||||
| 	return nil | ||||
| } | ||||
|  | ||||
| func (s *stateMemory) deleteContainer(podUID string, containerName string) { | ||||
| 	delete(s.podAllocation[podUID], containerName) | ||||
| 	if len(s.podAllocation[podUID]) == 0 { | ||||
| 		delete(s.podAllocation, podUID) | ||||
| 		delete(s.podResizeStatus, podUID) | ||||
| 	} | ||||
| 	klog.V(3).InfoS("Deleted pod resource allocation", "podUID", podUID, "containerName", containerName) | ||||
| } | ||||
|  | ||||
| func (s *stateMemory) Delete(podUID string, containerName string) error { | ||||
| 	s.Lock() | ||||
| 	defer s.Unlock() | ||||
| 	if len(containerName) == 0 { | ||||
| 		delete(s.podAllocation, podUID) | ||||
| 		delete(s.podResizeStatus, podUID) | ||||
| 		klog.V(3).InfoS("Deleted pod resource allocation and resize state", "podUID", podUID) | ||||
| 		return nil | ||||
| 	} | ||||
| 	s.deleteContainer(podUID, containerName) | ||||
| 	return nil | ||||
| } | ||||
|  | ||||
| func (s *stateMemory) ClearState() error { | ||||
| 	s.Lock() | ||||
| 	defer s.Unlock() | ||||
|  | ||||
| 	s.podAllocation = make(PodResourceAllocation) | ||||
| 	s.podResizeStatus = make(PodResizeStatus) | ||||
| 	klog.V(3).InfoS("Cleared state") | ||||
| 	return nil | ||||
| } | ||||
| @@ -41,10 +41,14 @@ import ( | ||||
| 	kubecontainer "k8s.io/kubernetes/pkg/kubelet/container" | ||||
| 	"k8s.io/kubernetes/pkg/kubelet/metrics" | ||||
| 	kubepod "k8s.io/kubernetes/pkg/kubelet/pod" | ||||
| 	"k8s.io/kubernetes/pkg/kubelet/status/state" | ||||
| 	kubetypes "k8s.io/kubernetes/pkg/kubelet/types" | ||||
| 	statusutil "k8s.io/kubernetes/pkg/util/pod" | ||||
| ) | ||||
|  | ||||
| // podStatusManagerStateFile is the file name where status manager stores its state | ||||
| const podStatusManagerStateFile = "pod_status_manager_state" | ||||
|  | ||||
| // A wrapper around v1.PodStatus that includes a version to enforce that stale pod statuses are | ||||
| // not sent to the API server. | ||||
| type versionedPodStatus struct { | ||||
| @@ -79,6 +83,10 @@ type manager struct { | ||||
| 	podDeletionSafety PodDeletionSafetyProvider | ||||
|  | ||||
| 	podStartupLatencyHelper PodStartupLatencyStateHelper | ||||
| 	// state allows to save/restore pod resource allocation and tolerate kubelet restarts. | ||||
| 	state state.State | ||||
| 	// stateFileDirectory holds the directory where the state file for checkpoints is held. | ||||
| 	stateFileDirectory string | ||||
| } | ||||
|  | ||||
| // PodStatusProvider knows how to provide status for a pod. It's intended to be used by other components | ||||
| @@ -128,12 +136,21 @@ type Manager interface { | ||||
| 	// RemoveOrphanedStatuses scans the status cache and removes any entries for pods not included in | ||||
| 	// the provided podUIDs. | ||||
| 	RemoveOrphanedStatuses(podUIDs map[types.UID]bool) | ||||
|  | ||||
| 	// State returns a read-only interface to the internal status manager state. | ||||
| 	State() state.Reader | ||||
|  | ||||
| 	// SetPodAllocation checkpoints the resources allocated to a pod's containers. | ||||
| 	SetPodAllocation(pod *v1.Pod) error | ||||
|  | ||||
| 	// SetPodResizeStatus checkpoints the last resizing decision for the pod. | ||||
| 	SetPodResizeStatus(podUID types.UID, resize v1.PodResizeStatus) error | ||||
| } | ||||
|  | ||||
| const syncPeriod = 10 * time.Second | ||||
|  | ||||
| // NewManager returns a functional Manager. | ||||
| func NewManager(kubeClient clientset.Interface, podManager kubepod.Manager, podDeletionSafety PodDeletionSafetyProvider, podStartupLatencyHelper PodStartupLatencyStateHelper) Manager { | ||||
| func NewManager(kubeClient clientset.Interface, podManager kubepod.Manager, podDeletionSafety PodDeletionSafetyProvider, podStartupLatencyHelper PodStartupLatencyStateHelper, stateFileDirectory string) Manager { | ||||
| 	return &manager{ | ||||
| 		kubeClient:              kubeClient, | ||||
| 		podManager:              podManager, | ||||
| @@ -142,6 +159,7 @@ func NewManager(kubeClient clientset.Interface, podManager kubepod.Manager, podD | ||||
| 		apiStatusVersions:       make(map[kubetypes.MirrorPodUID]uint64), | ||||
| 		podDeletionSafety:       podDeletionSafety, | ||||
| 		podStartupLatencyHelper: podStartupLatencyHelper, | ||||
| 		stateFileDirectory:      stateFileDirectory, | ||||
| 	} | ||||
| } | ||||
|  | ||||
| @@ -173,6 +191,15 @@ func (m *manager) Start() { | ||||
| 		return | ||||
| 	} | ||||
|  | ||||
| 	if utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) { | ||||
| 		stateImpl, err := state.NewStateCheckpoint(m.stateFileDirectory, podStatusManagerStateFile) | ||||
| 		if err != nil { | ||||
| 			klog.ErrorS(err, "Could not initialize pod allocation checkpoint manager, please drain node and remove policy state file") | ||||
| 			return | ||||
| 		} | ||||
| 		m.state = stateImpl | ||||
| 	} | ||||
|  | ||||
| 	klog.InfoS("Starting to sync pod status with apiserver") | ||||
|  | ||||
| 	//nolint:staticcheck // SA1015 Ticker can leak since this is only called once and doesn't handle termination. | ||||
| @@ -200,6 +227,34 @@ func (m *manager) Start() { | ||||
| 	}, 0) | ||||
| } | ||||
|  | ||||
| // State returns the pod resources checkpoint state of the pod status manager | ||||
| func (m *manager) State() state.Reader { | ||||
| 	return m.state | ||||
| } | ||||
|  | ||||
| // SetPodAllocation checkpoints the resources allocated to a pod's containers | ||||
| func (m *manager) SetPodAllocation(pod *v1.Pod) error { | ||||
| 	m.podStatusesLock.RLock() | ||||
| 	defer m.podStatusesLock.RUnlock() | ||||
| 	for _, container := range pod.Spec.Containers { | ||||
| 		var alloc v1.ResourceList | ||||
| 		if container.Resources.Requests != nil { | ||||
| 			alloc = container.Resources.Requests.DeepCopy() | ||||
| 		} | ||||
| 		if err := m.state.SetContainerResourceAllocation(string(pod.UID), container.Name, alloc); err != nil { | ||||
| 			return err | ||||
| 		} | ||||
| 	} | ||||
| 	return nil | ||||
| } | ||||
|  | ||||
| // SetPodResizeStatus checkpoints the last resizing decision for the pod. | ||||
| func (m *manager) SetPodResizeStatus(podUID types.UID, resizeStatus v1.PodResizeStatus) error { | ||||
| 	m.podStatusesLock.RLock() | ||||
| 	defer m.podStatusesLock.RUnlock() | ||||
| 	return m.state.SetPodResizeStatus(string(podUID), resizeStatus) | ||||
| } | ||||
|  | ||||
| func (m *manager) GetPodStatus(uid types.UID) (v1.PodStatus, bool) { | ||||
| 	m.podStatusesLock.RLock() | ||||
| 	defer m.podStatusesLock.RUnlock() | ||||
| @@ -616,6 +671,9 @@ func (m *manager) deletePodStatus(uid types.UID) { | ||||
| 	defer m.podStatusesLock.Unlock() | ||||
| 	delete(m.podStatuses, uid) | ||||
| 	m.podStartupLatencyHelper.DeletePodStartupState(uid) | ||||
| 	if utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) { | ||||
| 		m.state.Delete(string(uid), "") | ||||
| 	} | ||||
| } | ||||
|  | ||||
| // TODO(filipg): It'd be cleaner if we can do this without signal from user. | ||||
| @@ -626,6 +684,9 @@ func (m *manager) RemoveOrphanedStatuses(podUIDs map[types.UID]bool) { | ||||
| 		if _, ok := podUIDs[key]; !ok { | ||||
| 			klog.V(5).InfoS("Removing pod from status map.", "podUID", key) | ||||
| 			delete(m.podStatuses, key) | ||||
| 			if utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) { | ||||
| 				m.state.Delete(string(key), "") | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
| } | ||||
|   | ||||
| @@ -18,6 +18,7 @@ package status | ||||
|  | ||||
| import ( | ||||
| 	"fmt" | ||||
| 	"io/ioutil" | ||||
| 	"math/rand" | ||||
| 	"reflect" | ||||
| 	"strconv" | ||||
| @@ -87,7 +88,13 @@ func newTestManager(kubeClient clientset.Interface) *manager { | ||||
| 	podManager := kubepod.NewBasicPodManager(podtest.NewFakeMirrorClient()) | ||||
| 	podManager.AddPod(getTestPod()) | ||||
| 	podStartupLatencyTracker := util.NewPodStartupLatencyTracker() | ||||
| 	return NewManager(kubeClient, podManager, &statustest.FakePodDeletionSafetyProvider{}, podStartupLatencyTracker).(*manager) | ||||
| 	testRootDir := "" | ||||
| 	if tempDir, err := ioutil.TempDir("", "kubelet_test."); err != nil { | ||||
| 		return nil | ||||
| 	} else { | ||||
| 		testRootDir = tempDir | ||||
| 	} | ||||
| 	return NewManager(kubeClient, podManager, &statustest.FakePodDeletionSafetyProvider{}, podStartupLatencyTracker, testRootDir).(*manager) | ||||
| } | ||||
|  | ||||
| func generateRandomMessage() string { | ||||
| @@ -962,7 +969,7 @@ func TestTerminatePod_DefaultUnknownStatus(t *testing.T) { | ||||
| 		t.Run(tc.name, func(t *testing.T) { | ||||
| 			podManager := kubepod.NewBasicPodManager(podtest.NewFakeMirrorClient()) | ||||
| 			podStartupLatencyTracker := util.NewPodStartupLatencyTracker() | ||||
| 			syncer := NewManager(&fake.Clientset{}, podManager, &statustest.FakePodDeletionSafetyProvider{}, podStartupLatencyTracker).(*manager) | ||||
| 			syncer := NewManager(&fake.Clientset{}, podManager, &statustest.FakePodDeletionSafetyProvider{}, podStartupLatencyTracker, "").(*manager) | ||||
|  | ||||
| 			original := tc.pod.DeepCopy() | ||||
| 			syncer.SetPodStatus(original, original.Status) | ||||
|   | ||||
| @@ -27,6 +27,7 @@ import ( | ||||
| 	v1 "k8s.io/api/core/v1" | ||||
| 	types "k8s.io/apimachinery/pkg/types" | ||||
| 	container "k8s.io/kubernetes/pkg/kubelet/container" | ||||
| 	state "k8s.io/kubernetes/pkg/kubelet/status/state" | ||||
| ) | ||||
|  | ||||
| // MockPodStatusProvider is a mock of PodStatusProvider interface. | ||||
| @@ -239,6 +240,34 @@ func (mr *MockManagerMockRecorder) SetContainerStartup(podUID, containerID, star | ||||
| 	return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "SetContainerStartup", reflect.TypeOf((*MockManager)(nil).SetContainerStartup), podUID, containerID, started) | ||||
| } | ||||
|  | ||||
| // SetPodAllocation mocks base method. | ||||
| func (m *MockManager) SetPodAllocation(pod *v1.Pod) error { | ||||
| 	m.ctrl.T.Helper() | ||||
| 	ret := m.ctrl.Call(m, "SetPodAllocation", pod) | ||||
| 	ret0, _ := ret[0].(error) | ||||
| 	return ret0 | ||||
| } | ||||
|  | ||||
| // SetPodAllocation indicates an expected call of SetPodAllocation. | ||||
| func (mr *MockManagerMockRecorder) SetPodAllocation(pod interface{}) *gomock.Call { | ||||
| 	mr.mock.ctrl.T.Helper() | ||||
| 	return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "SetPodAllocation", reflect.TypeOf((*MockManager)(nil).SetPodAllocation), pod) | ||||
| } | ||||
|  | ||||
| // SetPodResizeStatus mocks base method. | ||||
| func (m *MockManager) SetPodResizeStatus(podUID types.UID, resize v1.PodResizeStatus) error { | ||||
| 	m.ctrl.T.Helper() | ||||
| 	ret := m.ctrl.Call(m, "SetPodResizeStatus", podUID, resize) | ||||
| 	ret0, _ := ret[0].(error) | ||||
| 	return ret0 | ||||
| } | ||||
|  | ||||
| // SetPodResizeStatus indicates an expected call of SetPodResizeStatus. | ||||
| func (mr *MockManagerMockRecorder) SetPodResizeStatus(podUID, resize interface{}) *gomock.Call { | ||||
| 	mr.mock.ctrl.T.Helper() | ||||
| 	return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "SetPodResizeStatus", reflect.TypeOf((*MockManager)(nil).SetPodResizeStatus), podUID, resize) | ||||
| } | ||||
|  | ||||
| // SetPodStatus mocks base method. | ||||
| func (m *MockManager) SetPodStatus(pod *v1.Pod, status v1.PodStatus) { | ||||
| 	m.ctrl.T.Helper() | ||||
| @@ -263,6 +292,20 @@ func (mr *MockManagerMockRecorder) Start() *gomock.Call { | ||||
| 	return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Start", reflect.TypeOf((*MockManager)(nil).Start)) | ||||
| } | ||||
|  | ||||
| // State mocks base method. | ||||
| func (m *MockManager) State() state.Reader { | ||||
| 	m.ctrl.T.Helper() | ||||
| 	ret := m.ctrl.Call(m, "State") | ||||
| 	ret0, _ := ret[0].(state.Reader) | ||||
| 	return ret0 | ||||
| } | ||||
|  | ||||
| // State indicates an expected call of State. | ||||
| func (mr *MockManagerMockRecorder) State() *gomock.Call { | ||||
| 	mr.mock.ctrl.T.Helper() | ||||
| 	return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "State", reflect.TypeOf((*MockManager)(nil).State)) | ||||
| } | ||||
|  | ||||
| // TerminatePod mocks base method. | ||||
| func (m *MockManager) TerminatePod(pod *v1.Pod) { | ||||
| 	m.ctrl.T.Helper() | ||||
|   | ||||
							
								
								
									
										1447
									
								
								test/e2e/node/pod_resize.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										1447
									
								
								test/e2e/node/pod_resize.go
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
		Reference in New Issue
	
	Block a user