mirror of
https://github.com/optim-enterprises-bv/kubernetes.git
synced 2025-11-02 11:18:16 +00:00
Implement TopologyInfo and cpu_ids in podresources
It covers deviceplugin & cpumanager. It has drawback, since cpuset and all other structs including cadvisor's keep cpu as int, but for protobuf based interface is better to have fixed int. This patch also introduces additional interface CPUsProvider, while DeviceProvider might have been extended too. Checkpoint not covered by unit test. Signed-off-by: Swati Sehgal <swsehgal@redhat.com> Signed-off-by: Alexey Perevalov <alexey.perevalov@huawei.com>
This commit is contained in:
@@ -28,14 +28,16 @@ import (
|
|||||||
type v1PodResourcesServer struct {
|
type v1PodResourcesServer struct {
|
||||||
podsProvider PodsProvider
|
podsProvider PodsProvider
|
||||||
devicesProvider DevicesProvider
|
devicesProvider DevicesProvider
|
||||||
|
cpusProvider CPUsProvider
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewV1PodResourcesServer returns a PodResourcesListerServer which lists pods provided by the PodsProvider
|
// NewV1PodResourcesServer returns a PodResourcesListerServer which lists pods provided by the PodsProvider
|
||||||
// with device information provided by the DevicesProvider
|
// with device information provided by the DevicesProvider
|
||||||
func NewV1PodResourcesServer(podsProvider PodsProvider, devicesProvider DevicesProvider) v1.PodResourcesListerServer {
|
func NewV1PodResourcesServer(podsProvider PodsProvider, devicesProvider DevicesProvider, cpusProvider CPUsProvider) v1.PodResourcesListerServer {
|
||||||
return &v1PodResourcesServer{
|
return &v1PodResourcesServer{
|
||||||
podsProvider: podsProvider,
|
podsProvider: podsProvider,
|
||||||
devicesProvider: devicesProvider,
|
devicesProvider: devicesProvider,
|
||||||
|
cpusProvider: cpusProvider,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -58,6 +60,7 @@ func (p *v1PodResourcesServer) List(ctx context.Context, req *v1.ListPodResource
|
|||||||
pRes.Containers[j] = &v1.ContainerResources{
|
pRes.Containers[j] = &v1.ContainerResources{
|
||||||
Name: container.Name,
|
Name: container.Name,
|
||||||
Devices: p.devicesProvider.GetDevices(string(pod.UID), container.Name),
|
Devices: p.devicesProvider.GetDevices(string(pod.UID), container.Name),
|
||||||
|
CpuIds: p.cpusProvider.GetCPUs(string(pod.UID), container.Name),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
podResources[i] = &pRes
|
podResources[i] = &pRes
|
||||||
|
|||||||
@@ -31,24 +31,30 @@ func TestListPodResourcesV1(t *testing.T) {
|
|||||||
podNamespace := "pod-namespace"
|
podNamespace := "pod-namespace"
|
||||||
podUID := types.UID("pod-uid")
|
podUID := types.UID("pod-uid")
|
||||||
containerName := "container-name"
|
containerName := "container-name"
|
||||||
|
numaID := int64(1)
|
||||||
|
|
||||||
devs := []*podresourcesapi.ContainerDevices{
|
devs := []*podresourcesapi.ContainerDevices{
|
||||||
{
|
{
|
||||||
ResourceName: "resource",
|
ResourceName: "resource",
|
||||||
DeviceIds: []string{"dev0", "dev1"},
|
DeviceIds: []string{"dev0", "dev1"},
|
||||||
|
Topology: &podresourcesapi.TopologyInfo{Nodes: []*podresourcesapi.NUMANode{{ID: numaID}}},
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
cpus := []int64{12, 23, 30}
|
||||||
|
|
||||||
for _, tc := range []struct {
|
for _, tc := range []struct {
|
||||||
desc string
|
desc string
|
||||||
pods []*v1.Pod
|
pods []*v1.Pod
|
||||||
devices []*podresourcesapi.ContainerDevices
|
devices []*podresourcesapi.ContainerDevices
|
||||||
|
cpus []int64
|
||||||
expectedResponse *podresourcesapi.ListPodResourcesResponse
|
expectedResponse *podresourcesapi.ListPodResourcesResponse
|
||||||
}{
|
}{
|
||||||
{
|
{
|
||||||
desc: "no pods",
|
desc: "no pods",
|
||||||
pods: []*v1.Pod{},
|
pods: []*v1.Pod{},
|
||||||
devices: []*podresourcesapi.ContainerDevices{},
|
devices: []*podresourcesapi.ContainerDevices{},
|
||||||
|
cpus: []int64{},
|
||||||
expectedResponse: &podresourcesapi.ListPodResourcesResponse{},
|
expectedResponse: &podresourcesapi.ListPodResourcesResponse{},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -70,6 +76,7 @@ func TestListPodResourcesV1(t *testing.T) {
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
devices: []*podresourcesapi.ContainerDevices{},
|
devices: []*podresourcesapi.ContainerDevices{},
|
||||||
|
cpus: []int64{},
|
||||||
expectedResponse: &podresourcesapi.ListPodResourcesResponse{
|
expectedResponse: &podresourcesapi.ListPodResourcesResponse{
|
||||||
PodResources: []*podresourcesapi.PodResources{
|
PodResources: []*podresourcesapi.PodResources{
|
||||||
{
|
{
|
||||||
@@ -104,6 +111,7 @@ func TestListPodResourcesV1(t *testing.T) {
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
devices: devs,
|
devices: devs,
|
||||||
|
cpus: cpus,
|
||||||
expectedResponse: &podresourcesapi.ListPodResourcesResponse{
|
expectedResponse: &podresourcesapi.ListPodResourcesResponse{
|
||||||
PodResources: []*podresourcesapi.PodResources{
|
PodResources: []*podresourcesapi.PodResources{
|
||||||
{
|
{
|
||||||
@@ -113,6 +121,7 @@ func TestListPodResourcesV1(t *testing.T) {
|
|||||||
{
|
{
|
||||||
Name: containerName,
|
Name: containerName,
|
||||||
Devices: devs,
|
Devices: devs,
|
||||||
|
CpuIds: cpus,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
@@ -124,8 +133,9 @@ func TestListPodResourcesV1(t *testing.T) {
|
|||||||
m := new(mockProvider)
|
m := new(mockProvider)
|
||||||
m.On("GetPods").Return(tc.pods)
|
m.On("GetPods").Return(tc.pods)
|
||||||
m.On("GetDevices", string(podUID), containerName).Return(tc.devices)
|
m.On("GetDevices", string(podUID), containerName).Return(tc.devices)
|
||||||
|
m.On("GetCPUs", string(podUID), containerName).Return(tc.cpus)
|
||||||
m.On("UpdateAllocatedDevices").Return()
|
m.On("UpdateAllocatedDevices").Return()
|
||||||
server := NewV1PodResourcesServer(m, m)
|
server := NewV1PodResourcesServer(m, m, m)
|
||||||
resp, err := server.List(context.TODO(), &podresourcesapi.ListPodResourcesRequest{})
|
resp, err := server.List(context.TODO(), &podresourcesapi.ListPodResourcesRequest{})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Errorf("want err = %v, got %q", nil, err)
|
t.Errorf("want err = %v, got %q", nil, err)
|
||||||
|
|||||||
@@ -43,6 +43,11 @@ func (m *mockProvider) GetDevices(podUID, containerName string) []*podresourcesv
|
|||||||
return args.Get(0).([]*podresourcesv1.ContainerDevices)
|
return args.Get(0).([]*podresourcesv1.ContainerDevices)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (m *mockProvider) GetCPUs(podUID, containerName string) []int64 {
|
||||||
|
args := m.Called(podUID, containerName)
|
||||||
|
return args.Get(0).([]int64)
|
||||||
|
}
|
||||||
|
|
||||||
func (m *mockProvider) UpdateAllocatedDevices() {
|
func (m *mockProvider) UpdateAllocatedDevices() {
|
||||||
m.Called()
|
m.Called()
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -31,3 +31,8 @@ type DevicesProvider interface {
|
|||||||
type PodsProvider interface {
|
type PodsProvider interface {
|
||||||
GetPods() []*v1.Pod
|
GetPods() []*v1.Pod
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// CPUsProvider knows how to provide the cpus used by the given container
|
||||||
|
type CPUsProvider interface {
|
||||||
|
GetCPUs(podUID, containerName string) []int64
|
||||||
|
}
|
||||||
|
|||||||
@@ -106,6 +106,9 @@ type ContainerManager interface {
|
|||||||
// GetDevices returns information about the devices assigned to pods and containers
|
// GetDevices returns information about the devices assigned to pods and containers
|
||||||
GetDevices(podUID, containerName string) []*podresourcesapi.ContainerDevices
|
GetDevices(podUID, containerName string) []*podresourcesapi.ContainerDevices
|
||||||
|
|
||||||
|
// GetCPUs returns information about the cpus assigned to pods and containers
|
||||||
|
GetCPUs(podUID, containerName string) []int64
|
||||||
|
|
||||||
// ShouldResetExtendedResourceCapacity returns whether or not the extended resources should be zeroed,
|
// ShouldResetExtendedResourceCapacity returns whether or not the extended resources should be zeroed,
|
||||||
// due to node recreation.
|
// due to node recreation.
|
||||||
ShouldResetExtendedResourceCapacity() bool
|
ShouldResetExtendedResourceCapacity() bool
|
||||||
|
|||||||
@@ -1027,6 +1027,10 @@ func (cm *containerManagerImpl) GetDevices(podUID, containerName string) []*podr
|
|||||||
return cm.deviceManager.GetDevices(podUID, containerName)
|
return cm.deviceManager.GetDevices(podUID, containerName)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (cm *containerManagerImpl) GetCPUs(podUID, containerName string) []int64 {
|
||||||
|
return cm.cpuManager.GetCPUs(podUID, containerName)
|
||||||
|
}
|
||||||
|
|
||||||
func (cm *containerManagerImpl) ShouldResetExtendedResourceCapacity() bool {
|
func (cm *containerManagerImpl) ShouldResetExtendedResourceCapacity() bool {
|
||||||
return cm.deviceManager.ShouldResetExtendedResourceCapacity()
|
return cm.deviceManager.ShouldResetExtendedResourceCapacity()
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -125,6 +125,10 @@ func (cm *containerManagerStub) UpdateAllocatedDevices() {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (cm *containerManagerStub) GetCPUs(_, _ string) []int64 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
func NewStubContainerManager() ContainerManager {
|
func NewStubContainerManager() ContainerManager {
|
||||||
return &containerManagerStub{shouldResetExtendedResourceCapacity: false}
|
return &containerManagerStub{shouldResetExtendedResourceCapacity: false}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -192,3 +192,7 @@ func (cm *containerManagerImpl) GetAllocateResourcesPodAdmitHandler() lifecycle.
|
|||||||
func (cm *containerManagerImpl) UpdateAllocatedDevices() {
|
func (cm *containerManagerImpl) UpdateAllocatedDevices() {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (cm *containerManagerImpl) GetCPUs(_, _ string) []int64 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|||||||
@@ -77,6 +77,10 @@ type Manager interface {
|
|||||||
// and is consulted to achieve NUMA aware resource alignment among this
|
// and is consulted to achieve NUMA aware resource alignment among this
|
||||||
// and other resource controllers.
|
// and other resource controllers.
|
||||||
GetTopologyHints(*v1.Pod, *v1.Container) map[string][]topologymanager.TopologyHint
|
GetTopologyHints(*v1.Pod, *v1.Container) map[string][]topologymanager.TopologyHint
|
||||||
|
|
||||||
|
// GetCPUs implements the podresources.CPUsProvider interface to provide allocated
|
||||||
|
// cpus for the container
|
||||||
|
GetCPUs(podUID, containerName string) []int64
|
||||||
}
|
}
|
||||||
|
|
||||||
type manager struct {
|
type manager struct {
|
||||||
@@ -461,3 +465,12 @@ func (m *manager) updateContainerCPUSet(containerID string, cpus cpuset.CPUSet)
|
|||||||
CpusetCpus: cpus.String(),
|
CpusetCpus: cpus.String(),
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (m *manager) GetCPUs(podUID, containerName string) []int64 {
|
||||||
|
cpus := m.state.GetCPUSetOrDefault(string(podUID), containerName)
|
||||||
|
result := []int64{}
|
||||||
|
for _, cpu := range cpus.ToSliceNoSort() {
|
||||||
|
result = append(result, int64(cpu))
|
||||||
|
}
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
|||||||
@@ -64,6 +64,11 @@ func (m *fakeManager) State() state.Reader {
|
|||||||
return m.state
|
return m.state
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (m *fakeManager) GetCPUs(podUID, containerName string) []int64 {
|
||||||
|
klog.Infof("[fake cpumanager] GetCPUs(podUID: %s, containerName: %s)", podUID, containerName)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
// NewFakeManager creates empty/fake cpu manager
|
// NewFakeManager creates empty/fake cpu manager
|
||||||
func NewFakeManager() Manager {
|
func NewFakeManager() Manager {
|
||||||
return &fakeManager{
|
return &fakeManager{
|
||||||
|
|||||||
@@ -48,11 +48,13 @@ go_test(
|
|||||||
srcs = [
|
srcs = [
|
||||||
"endpoint_test.go",
|
"endpoint_test.go",
|
||||||
"manager_test.go",
|
"manager_test.go",
|
||||||
|
"pod_devices_test.go",
|
||||||
"topology_hints_test.go",
|
"topology_hints_test.go",
|
||||||
],
|
],
|
||||||
embed = [":go_default_library"],
|
embed = [":go_default_library"],
|
||||||
deps = [
|
deps = [
|
||||||
"//pkg/kubelet/checkpointmanager:go_default_library",
|
"//pkg/kubelet/checkpointmanager:go_default_library",
|
||||||
|
"//pkg/kubelet/cm/devicemanager/checkpoint:go_default_library",
|
||||||
"//pkg/kubelet/cm/topologymanager:go_default_library",
|
"//pkg/kubelet/cm/topologymanager:go_default_library",
|
||||||
"//pkg/kubelet/cm/topologymanager/bitmask:go_default_library",
|
"//pkg/kubelet/cm/topologymanager/bitmask:go_default_library",
|
||||||
"//pkg/kubelet/config:go_default_library",
|
"//pkg/kubelet/config:go_default_library",
|
||||||
|
|||||||
@@ -8,6 +8,7 @@ go_library(
|
|||||||
deps = [
|
deps = [
|
||||||
"//pkg/kubelet/checkpointmanager:go_default_library",
|
"//pkg/kubelet/checkpointmanager:go_default_library",
|
||||||
"//pkg/kubelet/checkpointmanager/checksum:go_default_library",
|
"//pkg/kubelet/checkpointmanager/checksum:go_default_library",
|
||||||
|
"//staging/src/k8s.io/apimachinery/pkg/util/sets:go_default_library",
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@@ -19,6 +19,7 @@ package checkpoint
|
|||||||
import (
|
import (
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
|
|
||||||
|
"k8s.io/apimachinery/pkg/util/sets"
|
||||||
"k8s.io/kubernetes/pkg/kubelet/checkpointmanager"
|
"k8s.io/kubernetes/pkg/kubelet/checkpointmanager"
|
||||||
"k8s.io/kubernetes/pkg/kubelet/checkpointmanager/checksum"
|
"k8s.io/kubernetes/pkg/kubelet/checkpointmanager/checksum"
|
||||||
)
|
)
|
||||||
@@ -29,12 +30,15 @@ type DeviceManagerCheckpoint interface {
|
|||||||
GetData() ([]PodDevicesEntry, map[string][]string)
|
GetData() ([]PodDevicesEntry, map[string][]string)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// DevicesPerNUMA represents device ids obtained from device plugin per NUMA node id
|
||||||
|
type DevicesPerNUMA map[int64][]string
|
||||||
|
|
||||||
// PodDevicesEntry connects pod information to devices
|
// PodDevicesEntry connects pod information to devices
|
||||||
type PodDevicesEntry struct {
|
type PodDevicesEntry struct {
|
||||||
PodUID string
|
PodUID string
|
||||||
ContainerName string
|
ContainerName string
|
||||||
ResourceName string
|
ResourceName string
|
||||||
DeviceIDs []string
|
DeviceIDs DevicesPerNUMA
|
||||||
AllocResp []byte
|
AllocResp []byte
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -52,6 +56,22 @@ type Data struct {
|
|||||||
Checksum checksum.Checksum
|
Checksum checksum.Checksum
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// NewDevicesPerNUMA is a function that creates DevicesPerNUMA map
|
||||||
|
func NewDevicesPerNUMA() DevicesPerNUMA {
|
||||||
|
return make(DevicesPerNUMA)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Devices is a function that returns all device ids for all NUMA nodes
|
||||||
|
// and represent it as sets.String
|
||||||
|
func (dev DevicesPerNUMA) Devices() sets.String {
|
||||||
|
result := sets.NewString()
|
||||||
|
|
||||||
|
for _, devs := range dev {
|
||||||
|
result.Insert(devs...)
|
||||||
|
}
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
|
||||||
// New returns an instance of Checkpoint
|
// New returns an instance of Checkpoint
|
||||||
func New(devEntries []PodDevicesEntry,
|
func New(devEntries []PodDevicesEntry,
|
||||||
devices map[string][]string) DeviceManagerCheckpoint {
|
devices map[string][]string) DeviceManagerCheckpoint {
|
||||||
|
|||||||
@@ -900,8 +900,17 @@ func (m *ManagerImpl) allocateContainerResources(pod *v1.Pod, container *v1.Cont
|
|||||||
return fmt.Errorf("no containers return in allocation response %v", resp)
|
return fmt.Errorf("no containers return in allocation response %v", resp)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
allocDevicesWithNUMA := checkpoint.NewDevicesPerNUMA()
|
||||||
// Update internal cached podDevices state.
|
// Update internal cached podDevices state.
|
||||||
m.podDevices.insert(podUID, contName, resource, allocDevices, resp.ContainerResponses[0])
|
m.mutex.Lock()
|
||||||
|
for dev := range allocDevices {
|
||||||
|
for idx := range m.allDevices[resource][dev].Topology.Nodes {
|
||||||
|
node := m.allDevices[resource][dev].Topology.Nodes[idx]
|
||||||
|
allocDevicesWithNUMA[node.ID] = append(allocDevicesWithNUMA[node.ID], dev)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
m.mutex.Unlock()
|
||||||
|
m.podDevices.insert(podUID, contName, resource, allocDevicesWithNUMA, resp.ContainerResponses[0])
|
||||||
}
|
}
|
||||||
|
|
||||||
if needsUpdateCheckpoint {
|
if needsUpdateCheckpoint {
|
||||||
|
|||||||
@@ -38,6 +38,7 @@ import (
|
|||||||
pluginapi "k8s.io/kubelet/pkg/apis/deviceplugin/v1beta1"
|
pluginapi "k8s.io/kubelet/pkg/apis/deviceplugin/v1beta1"
|
||||||
watcherapi "k8s.io/kubelet/pkg/apis/pluginregistration/v1"
|
watcherapi "k8s.io/kubelet/pkg/apis/pluginregistration/v1"
|
||||||
"k8s.io/kubernetes/pkg/kubelet/checkpointmanager"
|
"k8s.io/kubernetes/pkg/kubelet/checkpointmanager"
|
||||||
|
"k8s.io/kubernetes/pkg/kubelet/cm/devicemanager/checkpoint"
|
||||||
"k8s.io/kubernetes/pkg/kubelet/cm/topologymanager"
|
"k8s.io/kubernetes/pkg/kubelet/cm/topologymanager"
|
||||||
"k8s.io/kubernetes/pkg/kubelet/config"
|
"k8s.io/kubernetes/pkg/kubelet/config"
|
||||||
"k8s.io/kubernetes/pkg/kubelet/lifecycle"
|
"k8s.io/kubernetes/pkg/kubelet/lifecycle"
|
||||||
@@ -432,10 +433,10 @@ func TestUpdateCapacityAllocatable(t *testing.T) {
|
|||||||
as.True(testManager.isDevicePluginResource(resourceName2))
|
as.True(testManager.isDevicePluginResource(resourceName2))
|
||||||
}
|
}
|
||||||
|
|
||||||
func constructDevices(devices []string) sets.String {
|
func constructDevices(devices []string) checkpoint.DevicesPerNUMA {
|
||||||
ret := sets.NewString()
|
ret := checkpoint.DevicesPerNUMA{}
|
||||||
for _, dev := range devices {
|
for _, dev := range devices {
|
||||||
ret.Insert(dev)
|
ret[0] = append(ret[0], dev)
|
||||||
}
|
}
|
||||||
return ret
|
return ret
|
||||||
}
|
}
|
||||||
@@ -621,13 +622,11 @@ func getTestManager(tmpDir string, activePods ActivePodsFunc, testRes []TestReso
|
|||||||
activePods: activePods,
|
activePods: activePods,
|
||||||
sourcesReady: &sourcesReadyStub{},
|
sourcesReady: &sourcesReadyStub{},
|
||||||
checkpointManager: ckm,
|
checkpointManager: ckm,
|
||||||
|
allDevices: make(map[string]map[string]pluginapi.Device),
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, res := range testRes {
|
for _, res := range testRes {
|
||||||
testManager.healthyDevices[res.resourceName] = sets.NewString()
|
testManager.healthyDevices[res.resourceName] = sets.NewString(res.devs.Devices().UnsortedList()...)
|
||||||
for _, dev := range res.devs {
|
|
||||||
testManager.healthyDevices[res.resourceName].Insert(dev)
|
|
||||||
}
|
|
||||||
if res.resourceName == "domain1.com/resource1" {
|
if res.resourceName == "domain1.com/resource1" {
|
||||||
testManager.endpoints[res.resourceName] = endpointInfo{
|
testManager.endpoints[res.resourceName] = endpointInfo{
|
||||||
e: &MockEndpoint{allocateFunc: allocateStubFunc()},
|
e: &MockEndpoint{allocateFunc: allocateStubFunc()},
|
||||||
@@ -657,6 +656,8 @@ func getTestManager(tmpDir string, activePods ActivePodsFunc, testRes []TestReso
|
|||||||
opts: nil,
|
opts: nil,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
testManager.allDevices[res.resourceName] = makeDevice(res.devs)
|
||||||
|
|
||||||
}
|
}
|
||||||
return testManager, nil
|
return testManager, nil
|
||||||
}
|
}
|
||||||
@@ -664,19 +665,19 @@ func getTestManager(tmpDir string, activePods ActivePodsFunc, testRes []TestReso
|
|||||||
type TestResource struct {
|
type TestResource struct {
|
||||||
resourceName string
|
resourceName string
|
||||||
resourceQuantity resource.Quantity
|
resourceQuantity resource.Quantity
|
||||||
devs []string
|
devs checkpoint.DevicesPerNUMA
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestPodContainerDeviceAllocation(t *testing.T) {
|
func TestPodContainerDeviceAllocation(t *testing.T) {
|
||||||
res1 := TestResource{
|
res1 := TestResource{
|
||||||
resourceName: "domain1.com/resource1",
|
resourceName: "domain1.com/resource1",
|
||||||
resourceQuantity: *resource.NewQuantity(int64(2), resource.DecimalSI),
|
resourceQuantity: *resource.NewQuantity(int64(2), resource.DecimalSI),
|
||||||
devs: []string{"dev1", "dev2"},
|
devs: checkpoint.DevicesPerNUMA{0: []string{"dev1", "dev2"}},
|
||||||
}
|
}
|
||||||
res2 := TestResource{
|
res2 := TestResource{
|
||||||
resourceName: "domain2.com/resource2",
|
resourceName: "domain2.com/resource2",
|
||||||
resourceQuantity: *resource.NewQuantity(int64(1), resource.DecimalSI),
|
resourceQuantity: *resource.NewQuantity(int64(1), resource.DecimalSI),
|
||||||
devs: []string{"dev3", "dev4"},
|
devs: checkpoint.DevicesPerNUMA{0: []string{"dev3", "dev4"}},
|
||||||
}
|
}
|
||||||
testResources := make([]TestResource, 2)
|
testResources := make([]TestResource, 2)
|
||||||
testResources = append(testResources, res1)
|
testResources = append(testResources, res1)
|
||||||
@@ -767,12 +768,12 @@ func TestInitContainerDeviceAllocation(t *testing.T) {
|
|||||||
res1 := TestResource{
|
res1 := TestResource{
|
||||||
resourceName: "domain1.com/resource1",
|
resourceName: "domain1.com/resource1",
|
||||||
resourceQuantity: *resource.NewQuantity(int64(2), resource.DecimalSI),
|
resourceQuantity: *resource.NewQuantity(int64(2), resource.DecimalSI),
|
||||||
devs: []string{"dev1", "dev2"},
|
devs: checkpoint.DevicesPerNUMA{0: []string{"dev1", "dev2"}},
|
||||||
}
|
}
|
||||||
res2 := TestResource{
|
res2 := TestResource{
|
||||||
resourceName: "domain2.com/resource2",
|
resourceName: "domain2.com/resource2",
|
||||||
resourceQuantity: *resource.NewQuantity(int64(1), resource.DecimalSI),
|
resourceQuantity: *resource.NewQuantity(int64(1), resource.DecimalSI),
|
||||||
devs: []string{"dev3", "dev4"},
|
devs: checkpoint.DevicesPerNUMA{0: []string{"dev3", "dev4"}},
|
||||||
}
|
}
|
||||||
testResources := make([]TestResource, 2)
|
testResources := make([]TestResource, 2)
|
||||||
testResources = append(testResources, res1)
|
testResources = append(testResources, res1)
|
||||||
@@ -920,7 +921,7 @@ func TestDevicePreStartContainer(t *testing.T) {
|
|||||||
res1 := TestResource{
|
res1 := TestResource{
|
||||||
resourceName: "domain1.com/resource1",
|
resourceName: "domain1.com/resource1",
|
||||||
resourceQuantity: *resource.NewQuantity(int64(2), resource.DecimalSI),
|
resourceQuantity: *resource.NewQuantity(int64(2), resource.DecimalSI),
|
||||||
devs: []string{"dev1", "dev2"},
|
devs: checkpoint.DevicesPerNUMA{0: []string{"dev1", "dev2"}},
|
||||||
}
|
}
|
||||||
as := require.New(t)
|
as := require.New(t)
|
||||||
podsStub := activePodsStub{
|
podsStub := activePodsStub{
|
||||||
@@ -960,7 +961,7 @@ func TestDevicePreStartContainer(t *testing.T) {
|
|||||||
|
|
||||||
as.Contains(initializedDevs, "dev1")
|
as.Contains(initializedDevs, "dev1")
|
||||||
as.Contains(initializedDevs, "dev2")
|
as.Contains(initializedDevs, "dev2")
|
||||||
as.Equal(len(initializedDevs), len(res1.devs))
|
as.Equal(len(initializedDevs), res1.devs.Devices().Len())
|
||||||
|
|
||||||
expectedResps, err := allocateStubFunc()([]string{"dev1", "dev2"})
|
expectedResps, err := allocateStubFunc()([]string{"dev1", "dev2"})
|
||||||
as.Nil(err)
|
as.Nil(err)
|
||||||
@@ -1057,3 +1058,13 @@ func allocateStubFunc() func(devs []string) (*pluginapi.AllocateResponse, error)
|
|||||||
return resps, nil
|
return resps, nil
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func makeDevice(devOnNUMA checkpoint.DevicesPerNUMA) map[string]pluginapi.Device {
|
||||||
|
res := make(map[string]pluginapi.Device)
|
||||||
|
for node, devs := range devOnNUMA {
|
||||||
|
for idx := range devs {
|
||||||
|
res[devs[idx]] = pluginapi.Device{ID: devs[idx], Topology: &pluginapi.TopologyInfo{Nodes: []*pluginapi.NUMANode{{ID: node}}}}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return res
|
||||||
|
}
|
||||||
|
|||||||
@@ -30,7 +30,7 @@ import (
|
|||||||
|
|
||||||
type deviceAllocateInfo struct {
|
type deviceAllocateInfo struct {
|
||||||
// deviceIds contains device Ids allocated to this container for the given resourceName.
|
// deviceIds contains device Ids allocated to this container for the given resourceName.
|
||||||
deviceIds sets.String
|
deviceIds checkpoint.DevicesPerNUMA
|
||||||
// allocResp contains cached rpc AllocateResponse.
|
// allocResp contains cached rpc AllocateResponse.
|
||||||
allocResp *pluginapi.ContainerAllocateResponse
|
allocResp *pluginapi.ContainerAllocateResponse
|
||||||
}
|
}
|
||||||
@@ -70,7 +70,7 @@ func (pdev *podDevices) hasPod(podUID string) bool {
|
|||||||
return podExists
|
return podExists
|
||||||
}
|
}
|
||||||
|
|
||||||
func (pdev *podDevices) insert(podUID, contName, resource string, devices sets.String, resp *pluginapi.ContainerAllocateResponse) {
|
func (pdev *podDevices) insert(podUID, contName, resource string, devices checkpoint.DevicesPerNUMA, resp *pluginapi.ContainerAllocateResponse) {
|
||||||
pdev.Lock()
|
pdev.Lock()
|
||||||
defer pdev.Unlock()
|
defer pdev.Unlock()
|
||||||
if _, podExists := pdev.devs[podUID]; !podExists {
|
if _, podExists := pdev.devs[podUID]; !podExists {
|
||||||
@@ -108,7 +108,7 @@ func (pdev *podDevices) containerDevices(podUID, contName, resource string) sets
|
|||||||
if !resourceExists {
|
if !resourceExists {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
return devs.deviceIds
|
return devs.deviceIds.Devices()
|
||||||
}
|
}
|
||||||
|
|
||||||
// Populates allocatedResources with the device resources allocated to the specified <podUID, contName>.
|
// Populates allocatedResources with the device resources allocated to the specified <podUID, contName>.
|
||||||
@@ -124,7 +124,7 @@ func (pdev *podDevices) addContainerAllocatedResources(podUID, contName string,
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
for resource, devices := range resources {
|
for resource, devices := range resources {
|
||||||
allocatedResources[resource] = allocatedResources[resource].Union(devices.deviceIds)
|
allocatedResources[resource] = allocatedResources[resource].Union(devices.deviceIds.Devices())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -141,7 +141,7 @@ func (pdev *podDevices) removeContainerAllocatedResources(podUID, contName strin
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
for resource, devices := range resources {
|
for resource, devices := range resources {
|
||||||
allocatedResources[resource] = allocatedResources[resource].Difference(devices.deviceIds)
|
allocatedResources[resource] = allocatedResources[resource].Difference(devices.deviceIds.Devices())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -157,7 +157,7 @@ func (pdev *podDevices) devices() map[string]sets.String {
|
|||||||
ret[resource] = sets.NewString()
|
ret[resource] = sets.NewString()
|
||||||
}
|
}
|
||||||
if devices.allocResp != nil {
|
if devices.allocResp != nil {
|
||||||
ret[resource] = ret[resource].Union(devices.deviceIds)
|
ret[resource] = ret[resource].Union(devices.deviceIds.Devices())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -173,7 +173,6 @@ func (pdev *podDevices) toCheckpointData() []checkpoint.PodDevicesEntry {
|
|||||||
for podUID, containerDevices := range pdev.devs {
|
for podUID, containerDevices := range pdev.devs {
|
||||||
for conName, resources := range containerDevices {
|
for conName, resources := range containerDevices {
|
||||||
for resource, devices := range resources {
|
for resource, devices := range resources {
|
||||||
devIds := devices.deviceIds.UnsortedList()
|
|
||||||
if devices.allocResp == nil {
|
if devices.allocResp == nil {
|
||||||
klog.Errorf("Can't marshal allocResp for %v %v %v: allocation response is missing", podUID, conName, resource)
|
klog.Errorf("Can't marshal allocResp for %v %v %v: allocation response is missing", podUID, conName, resource)
|
||||||
continue
|
continue
|
||||||
@@ -188,7 +187,7 @@ func (pdev *podDevices) toCheckpointData() []checkpoint.PodDevicesEntry {
|
|||||||
PodUID: podUID,
|
PodUID: podUID,
|
||||||
ContainerName: conName,
|
ContainerName: conName,
|
||||||
ResourceName: resource,
|
ResourceName: resource,
|
||||||
DeviceIDs: devIds,
|
DeviceIDs: devices.deviceIds,
|
||||||
AllocResp: allocResp})
|
AllocResp: allocResp})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -201,17 +200,13 @@ func (pdev *podDevices) fromCheckpointData(data []checkpoint.PodDevicesEntry) {
|
|||||||
for _, entry := range data {
|
for _, entry := range data {
|
||||||
klog.V(2).Infof("Get checkpoint entry: %v %v %v %v %v\n",
|
klog.V(2).Infof("Get checkpoint entry: %v %v %v %v %v\n",
|
||||||
entry.PodUID, entry.ContainerName, entry.ResourceName, entry.DeviceIDs, entry.AllocResp)
|
entry.PodUID, entry.ContainerName, entry.ResourceName, entry.DeviceIDs, entry.AllocResp)
|
||||||
devIDs := sets.NewString()
|
|
||||||
for _, devID := range entry.DeviceIDs {
|
|
||||||
devIDs.Insert(devID)
|
|
||||||
}
|
|
||||||
allocResp := &pluginapi.ContainerAllocateResponse{}
|
allocResp := &pluginapi.ContainerAllocateResponse{}
|
||||||
err := allocResp.Unmarshal(entry.AllocResp)
|
err := allocResp.Unmarshal(entry.AllocResp)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
klog.Errorf("Can't unmarshal allocResp for %v %v %v: %v", entry.PodUID, entry.ContainerName, entry.ResourceName, err)
|
klog.Errorf("Can't unmarshal allocResp for %v %v %v: %v", entry.PodUID, entry.ContainerName, entry.ResourceName, err)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
pdev.insert(entry.PodUID, entry.ContainerName, entry.ResourceName, devIDs, allocResp)
|
pdev.insert(entry.PodUID, entry.ContainerName, entry.ResourceName, entry.DeviceIDs, allocResp)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -328,10 +323,13 @@ func (pdev *podDevices) getContainerDevices(podUID, contName string) []*podresou
|
|||||||
}
|
}
|
||||||
cDev := []*podresourcesapi.ContainerDevices{}
|
cDev := []*podresourcesapi.ContainerDevices{}
|
||||||
for resource, allocateInfo := range pdev.devs[podUID][contName] {
|
for resource, allocateInfo := range pdev.devs[podUID][contName] {
|
||||||
cDev = append(cDev, &podresourcesapi.ContainerDevices{
|
for numaid, devlist := range allocateInfo.deviceIds {
|
||||||
ResourceName: resource,
|
cDev = append(cDev, &podresourcesapi.ContainerDevices{
|
||||||
DeviceIds: allocateInfo.deviceIds.UnsortedList(),
|
ResourceName: resource,
|
||||||
})
|
DeviceIds: devlist,
|
||||||
|
Topology: &podresourcesapi.TopologyInfo{Nodes: []*podresourcesapi.NUMANode{{ID: numaid}}},
|
||||||
|
})
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return cDev
|
return cDev
|
||||||
}
|
}
|
||||||
|
|||||||
47
pkg/kubelet/cm/devicemanager/pod_devices_test.go
Normal file
47
pkg/kubelet/cm/devicemanager/pod_devices_test.go
Normal file
@@ -0,0 +1,47 @@
|
|||||||
|
/*
|
||||||
|
Copyright 2020 The Kubernetes Authors.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package devicemanager
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
|
|
||||||
|
"k8s.io/kubernetes/pkg/kubelet/cm/devicemanager/checkpoint"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestGetContainerDevices(t *testing.T) {
|
||||||
|
podDevices := newPodDevices()
|
||||||
|
resourceName1 := "domain1.com/resource1"
|
||||||
|
podID := "pod1"
|
||||||
|
contID := "con1"
|
||||||
|
devices := checkpoint.DevicesPerNUMA{0: []string{"dev1"}, 1: []string{"dev1"}}
|
||||||
|
|
||||||
|
podDevices.insert(podID, contID, resourceName1,
|
||||||
|
devices,
|
||||||
|
constructAllocResp(map[string]string{"/dev/r1dev1": "/dev/r1dev1", "/dev/r1dev2": "/dev/r1dev2"}, map[string]string{"/home/r1lib1": "/usr/r1lib1"}, map[string]string{}))
|
||||||
|
|
||||||
|
contDevices := podDevices.getContainerDevices(podID, contID)
|
||||||
|
require.Equal(t, len(devices), len(contDevices), "Incorrect container devices")
|
||||||
|
for _, contDev := range contDevices {
|
||||||
|
for _, node := range contDev.Topology.Nodes {
|
||||||
|
dev, ok := devices[node.ID]
|
||||||
|
require.True(t, ok, "NUMA id %v doesn't exist in result", node.ID)
|
||||||
|
require.Equal(t, contDev.DeviceIds[0], dev[0], "Can't find device %s in result", dev[0])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -404,7 +404,7 @@ func TestGetTopologyHints(t *testing.T) {
|
|||||||
for p := range tc.allocatedDevices {
|
for p := range tc.allocatedDevices {
|
||||||
for c := range tc.allocatedDevices[p] {
|
for c := range tc.allocatedDevices[p] {
|
||||||
for r, devices := range tc.allocatedDevices[p][c] {
|
for r, devices := range tc.allocatedDevices[p][c] {
|
||||||
m.podDevices.insert(p, c, r, sets.NewString(devices...), nil)
|
m.podDevices.insert(p, c, r, constructDevices(devices), nil)
|
||||||
|
|
||||||
m.allocatedDevices[r] = sets.NewString()
|
m.allocatedDevices[r] = sets.NewString()
|
||||||
for _, d := range devices {
|
for _, d := range devices {
|
||||||
|
|||||||
@@ -2193,7 +2193,7 @@ func (kl *Kubelet) ListenAndServePodResources() {
|
|||||||
klog.V(2).Infof("Failed to get local endpoint for PodResources endpoint: %v", err)
|
klog.V(2).Infof("Failed to get local endpoint for PodResources endpoint: %v", err)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
server.ListenAndServePodResources(socket, kl.podManager, kl.containerManager)
|
server.ListenAndServePodResources(socket, kl.podManager, kl.containerManager, kl.containerManager)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Delete the eligible dead container instances in a pod. Depending on the configuration, the latest dead containers may be kept around.
|
// Delete the eligible dead container instances in a pod. Depending on the configuration, the latest dead containers may be kept around.
|
||||||
|
|||||||
@@ -179,10 +179,10 @@ func ListenAndServeKubeletReadOnlyServer(host HostInterface, resourceAnalyzer st
|
|||||||
}
|
}
|
||||||
|
|
||||||
// ListenAndServePodResources initializes a gRPC server to serve the PodResources service
|
// ListenAndServePodResources initializes a gRPC server to serve the PodResources service
|
||||||
func ListenAndServePodResources(socket string, podsProvider podresources.PodsProvider, devicesProvider podresources.DevicesProvider) {
|
func ListenAndServePodResources(socket string, podsProvider podresources.PodsProvider, devicesProvider podresources.DevicesProvider, cpusProvider podresources.CPUsProvider) {
|
||||||
server := grpc.NewServer()
|
server := grpc.NewServer()
|
||||||
podresourcesapiv1alpha1.RegisterPodResourcesListerServer(server, podresources.NewV1alpha1PodResourcesServer(podsProvider, devicesProvider))
|
podresourcesapiv1alpha1.RegisterPodResourcesListerServer(server, podresources.NewV1alpha1PodResourcesServer(podsProvider, devicesProvider))
|
||||||
podresourcesapi.RegisterPodResourcesListerServer(server, podresources.NewV1PodResourcesServer(podsProvider, devicesProvider))
|
podresourcesapi.RegisterPodResourcesListerServer(server, podresources.NewV1PodResourcesServer(podsProvider, devicesProvider, cpusProvider))
|
||||||
l, err := util.CreateListener(socket)
|
l, err := util.CreateListener(socket)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
klog.Fatalf("Failed to create listener for podResources endpoint: %v", err)
|
klog.Fatalf("Failed to create listener for podResources endpoint: %v", err)
|
||||||
|
|||||||
Reference in New Issue
Block a user