mirror of
				https://github.com/optim-enterprises-bv/kubernetes.git
				synced 2025-11-04 04:08:16 +00:00 
			
		
		
		
	Automatic merge from submit-queue (batch tested with PRs 39684, 39577, 38989, 39534, 39702) Set PodStatus QOSClass field This PR continues the work for https://github.com/kubernetes/kubernetes/pull/37968 It converts all local usage of the `qos` package class types to the new API level types (first commit) and sets the pod status QOSClass field in the at pod creation time on the API server in `PrepareForCreate` and in the kubelet in the pod status update path (second commit). This way the pod QOS class is set even if the pod isn't scheduled yet. Fixes #33255 @ConnorDoyle @derekwaynecarr @vishh
		
			
				
	
	
		
			80 lines
		
	
	
		
			3.3 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
			
		
		
	
	
			80 lines
		
	
	
		
			3.3 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
/*
 | 
						|
Copyright 2015 The Kubernetes Authors.
 | 
						|
 | 
						|
Licensed under the Apache License, Version 2.0 (the "License");
 | 
						|
you may not use this file except in compliance with the License.
 | 
						|
You may obtain a copy of the License at
 | 
						|
 | 
						|
    http://www.apache.org/licenses/LICENSE-2.0
 | 
						|
 | 
						|
Unless required by applicable law or agreed to in writing, software
 | 
						|
distributed under the License is distributed on an "AS IS" BASIS,
 | 
						|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
						|
See the License for the specific language governing permissions and
 | 
						|
limitations under the License.
 | 
						|
*/
 | 
						|
 | 
						|
package qos
 | 
						|
 | 
						|
import (
 | 
						|
	"k8s.io/kubernetes/pkg/api/v1"
 | 
						|
	kubetypes "k8s.io/kubernetes/pkg/kubelet/types"
 | 
						|
)
 | 
						|
 | 
						|
const (
 | 
						|
	// PodInfraOOMAdj is very docker specific. For arbitrary runtime, it may not make
 | 
						|
	// sense to set sandbox level oom score, e.g. a sandbox could only be a namespace
 | 
						|
	// without a process.
 | 
						|
	// TODO: Handle infra container oom score adj in a runtime agnostic way.
 | 
						|
	// TODO: Should handle critical pod oom score adj with a proper preemption priority.
 | 
						|
	// This is the workaround for https://github.com/kubernetes/kubernetes/issues/38322.
 | 
						|
	PodInfraOOMAdj        int = -998
 | 
						|
	CriticalPodOOMAdj     int = -998
 | 
						|
	KubeletOOMScoreAdj    int = -999
 | 
						|
	DockerOOMScoreAdj     int = -999
 | 
						|
	KubeProxyOOMScoreAdj  int = -999
 | 
						|
	guaranteedOOMScoreAdj int = -998
 | 
						|
	besteffortOOMScoreAdj int = 1000
 | 
						|
)
 | 
						|
 | 
						|
// GetContainerOOMAdjust returns the amount by which the OOM score of all processes in the
 | 
						|
// container should be adjusted.
 | 
						|
// The OOM score of a process is the percentage of memory it consumes
 | 
						|
// multiplied by 10 (barring exceptional cases) + a configurable quantity which is between -1000
 | 
						|
// and 1000. Containers with higher OOM scores are killed if the system runs out of memory.
 | 
						|
// See https://lwn.net/Articles/391222/ for more information.
 | 
						|
func GetContainerOOMScoreAdjust(pod *v1.Pod, container *v1.Container, memoryCapacity int64) int {
 | 
						|
	if kubetypes.IsCriticalPod(pod) {
 | 
						|
		return CriticalPodOOMAdj
 | 
						|
	}
 | 
						|
 | 
						|
	switch GetPodQOS(pod) {
 | 
						|
	case v1.PodQOSGuaranteed:
 | 
						|
		// Guaranteed containers should be the last to get killed.
 | 
						|
		return guaranteedOOMScoreAdj
 | 
						|
	case v1.PodQOSBestEffort:
 | 
						|
		return besteffortOOMScoreAdj
 | 
						|
	}
 | 
						|
 | 
						|
	// Burstable containers are a middle tier, between Guaranteed and Best-Effort. Ideally,
 | 
						|
	// we want to protect Burstable containers that consume less memory than requested.
 | 
						|
	// The formula below is a heuristic. A container requesting for 10% of a system's
 | 
						|
	// memory will have an OOM score adjust of 900. If a process in container Y
 | 
						|
	// uses over 10% of memory, its OOM score will be 1000. The idea is that containers
 | 
						|
	// which use more than their request will have an OOM score of 1000 and will be prime
 | 
						|
	// targets for OOM kills.
 | 
						|
	// Note that this is a heuristic, it won't work if a container has many small processes.
 | 
						|
	memoryRequest := container.Resources.Requests.Memory().Value()
 | 
						|
	oomScoreAdjust := 1000 - (1000*memoryRequest)/memoryCapacity
 | 
						|
	// A guaranteed pod using 100% of memory can have an OOM score of 10. Ensure
 | 
						|
	// that burstable pods have a higher OOM score adjustment.
 | 
						|
	if int(oomScoreAdjust) < (1000 + guaranteedOOMScoreAdj) {
 | 
						|
		return (1000 + guaranteedOOMScoreAdj)
 | 
						|
	}
 | 
						|
	// Give burstable pods a higher chance of survival over besteffort pods.
 | 
						|
	if int(oomScoreAdjust) == besteffortOOMScoreAdj {
 | 
						|
		return int(oomScoreAdjust - 1)
 | 
						|
	}
 | 
						|
	return int(oomScoreAdjust)
 | 
						|
}
 |