In-place Pod Vertical Scaling - core implementation

1. Core Kubelet changes to implement In-place Pod Vertical Scaling. 2. E2E tests for In-place Pod Vertical Scaling. 3. Refactor kubelet code and add missing tests (Derek's kubelet review) 4. Add a new hash over container fields without Resources field to allow feature gate toggling without restarting containers not using the feature. 5. Fix corner-case where resize A->B->A gets ignored 6. Add cgroup v2 support to pod resize E2E test. KEP: /enhancements/keps/sig-node/1287-in-place-update-pod-resources Co-authored-by: Chen Wang <Chen.Wang1@ibm.com>
2025-11-01 18:58:18 +00:00 · 2022-11-04 13:47:33 -07:00
parent 231849a908
commit f2bd94a0de
48 changed files with 4639 additions and 56 deletions
--- a/pkg/kubelet/kuberuntime/kuberuntime_container_linux.go
+++ b/pkg/kubelet/kuberuntime/kuberuntime_container_linux.go
@@ -60,7 +60,7 @@ func (m *kubeGenericRuntimeManager) generateLinuxContainerConfig(container *v1.C
 		return nil, err
 	}
 	lc := &runtimeapi.LinuxContainerConfig{
-		Resources:       &runtimeapi.LinuxContainerResources{},
+		Resources:       m.generateLinuxContainerResources(pod, container, enforceMemoryQoS),
 		SecurityContext: sc,
 	}

@@ -69,17 +69,22 @@ func (m *kubeGenericRuntimeManager) generateLinuxContainerConfig(container *v1.C
 		lc.SecurityContext.NamespaceOptions.TargetId = nsTarget.ID
 	}

+	return lc, nil
+}
+
+// generateLinuxContainerResources generates linux container resources config for runtime
+func (m *kubeGenericRuntimeManager) generateLinuxContainerResources(pod *v1.Pod, container *v1.Container, enforceMemoryQoS bool) *runtimeapi.LinuxContainerResources {
 	// set linux container resources
 	var cpuRequest *resource.Quantity
 	if _, cpuRequestExists := container.Resources.Requests[v1.ResourceCPU]; cpuRequestExists {
 		cpuRequest = container.Resources.Requests.Cpu()
 	}
-	lc.Resources = m.calculateLinuxResources(cpuRequest, container.Resources.Limits.Cpu(), container.Resources.Limits.Memory())
+	lcr := m.calculateLinuxResources(cpuRequest, container.Resources.Limits.Cpu(), container.Resources.Limits.Memory())

-	lc.Resources.OomScoreAdj = int64(qos.GetContainerOOMScoreAdjust(pod, container,
+	lcr.OomScoreAdj = int64(qos.GetContainerOOMScoreAdjust(pod, container,
 		int64(m.machineInfo.MemoryCapacity)))

-	lc.Resources.HugepageLimits = GetHugepageLimitsFromResources(container.Resources)
+	lcr.HugepageLimits = GetHugepageLimitsFromResources(container.Resources)

 	if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.NodeSwap) {
 		// NOTE(ehashman): Behaviour is defined in the opencontainers runtime spec:
@@ -87,14 +92,14 @@ func (m *kubeGenericRuntimeManager) generateLinuxContainerConfig(container *v1.C
 		switch m.memorySwapBehavior {
 		case kubelettypes.UnlimitedSwap:
 			// -1 = unlimited swap
-			lc.Resources.MemorySwapLimitInBytes = -1
+			lcr.MemorySwapLimitInBytes = -1
 		case kubelettypes.LimitedSwap:
 			fallthrough
 		default:
 			// memorySwapLimit = total permitted memory+swap; if equal to memory limit, => 0 swap above memory limit
 			// Some swapping is still possible.
 			// Note that if memory limit is 0, memory swap limit is ignored.
-			lc.Resources.MemorySwapLimitInBytes = lc.Resources.MemoryLimitInBytes
+			lcr.MemorySwapLimitInBytes = lcr.MemoryLimitInBytes
 		}
 	}

@@ -125,18 +130,31 @@ func (m *kubeGenericRuntimeManager) generateLinuxContainerConfig(container *v1.C
 			unified[cm.MemoryHigh] = strconv.FormatInt(memoryHigh, 10)
 		}
 		if len(unified) > 0 {
-			if lc.Resources.Unified == nil {
-				lc.Resources.Unified = unified
+			if lcr.Unified == nil {
+				lcr.Unified = unified
 			} else {
 				for k, v := range unified {
-					lc.Resources.Unified[k] = v
+					lcr.Unified[k] = v
 				}
 			}
 			klog.V(4).InfoS("MemoryQoS config for container", "pod", klog.KObj(pod), "containerName", container.Name, "unified", unified)
 		}
 	}

-	return lc, nil
+	return lcr
+}
+
+// generateContainerResources generates platform specific (linux) container resources config for runtime
+func (m *kubeGenericRuntimeManager) generateContainerResources(pod *v1.Pod, container *v1.Container) *runtimeapi.ContainerResources {
+	enforceMemoryQoS := false
+	// Set memory.min and memory.high if MemoryQoS enabled with cgroups v2
+	if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.MemoryQoS) &&
+		libcontainercgroups.IsCgroup2UnifiedMode() {
+		enforceMemoryQoS = true
+	}
+	return &runtimeapi.ContainerResources{
+		Linux: m.generateLinuxContainerResources(pod, container, enforceMemoryQoS),
+	}
 }

 // calculateLinuxResources will create the linuxContainerResources type based on the provided CPU and memory resource requests, limits
@@ -218,3 +236,34 @@ func GetHugepageLimitsFromResources(resources v1.ResourceRequirements) []*runtim

 	return hugepageLimits
 }
+
+func toKubeContainerResources(statusResources *runtimeapi.ContainerResources) *kubecontainer.ContainerResources {
+	var cStatusResources *kubecontainer.ContainerResources
+	runtimeStatusResources := statusResources.GetLinux()
+	if runtimeStatusResources != nil {
+		var cpuLimit, memLimit, cpuRequest *resource.Quantity
+		if runtimeStatusResources.CpuPeriod > 0 {
+			milliCPU := quotaToMilliCPU(runtimeStatusResources.CpuQuota, runtimeStatusResources.CpuPeriod)
+			if milliCPU > 0 {
+				cpuLimit = resource.NewMilliQuantity(milliCPU, resource.DecimalSI)
+			}
+		}
+		if runtimeStatusResources.CpuShares > 0 {
+			milliCPU := sharesToMilliCPU(runtimeStatusResources.CpuShares)
+			if milliCPU > 0 {
+				cpuRequest = resource.NewMilliQuantity(milliCPU, resource.DecimalSI)
+			}
+		}
+		if runtimeStatusResources.MemoryLimitInBytes > 0 {
+			memLimit = resource.NewQuantity(runtimeStatusResources.MemoryLimitInBytes, resource.BinarySI)
+		}
+		if cpuLimit != nil || memLimit != nil || cpuRequest != nil {
+			cStatusResources = &kubecontainer.ContainerResources{
+				CPULimit:    cpuLimit,
+				CPURequest:  cpuRequest,
+				MemoryLimit: memLimit,
+			}
+		}
+	}
+	return cStatusResources
+}