mirror of
https://github.com/optim-enterprises-bv/kubernetes.git
synced 2025-11-03 03:38:15 +00:00
DRA scheduler: adapt to v1alpha3 API
The structured parameter allocation logic was written from scratch in staging/src/k8s.io/dynamic-resource-allocation/structured where it might be useful for out-of-tree components. Besides the new features (amount, admin access) and API it now supports backtracking when the initial device selection doesn't lead to a complete allocation of all claims. Co-authored-by: Ed Bartosh <eduard.bartosh@intel.com> Co-authored-by: John Belamaric <jbelamaric@google.com>
This commit is contained in:
@@ -51,6 +51,11 @@ import (
|
||||
var (
|
||||
podKind = v1.SchemeGroupVersion.WithKind("Pod")
|
||||
|
||||
nodeName = "worker"
|
||||
node2Name = "worker-2"
|
||||
node3Name = "worker-3"
|
||||
controller = "some-driver"
|
||||
driver = controller
|
||||
podName = "my-pod"
|
||||
podUID = "1234"
|
||||
resourceName = "my-resource"
|
||||
@@ -59,45 +64,12 @@ var (
|
||||
claimName2 = podName + "-" + resourceName + "-2"
|
||||
className = "my-resource-class"
|
||||
namespace = "default"
|
||||
attrName = resourceapi.QualifiedName("healthy") // device attribute only available on non-default node
|
||||
|
||||
resourceClass = &resourceapi.ResourceClass{
|
||||
deviceClass = &resourceapi.DeviceClass{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: className,
|
||||
},
|
||||
DriverName: "some-driver",
|
||||
}
|
||||
structuredResourceClass = &resourceapi.ResourceClass{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: className,
|
||||
},
|
||||
DriverName: "some-driver",
|
||||
StructuredParameters: ptr.To(true),
|
||||
}
|
||||
structuredResourceClassWithParams = &resourceapi.ResourceClass{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: className,
|
||||
},
|
||||
DriverName: "some-driver",
|
||||
StructuredParameters: ptr.To(true),
|
||||
ParametersRef: &resourceapi.ResourceClassParametersReference{
|
||||
Name: className,
|
||||
Namespace: namespace,
|
||||
Kind: "ResourceClassParameters",
|
||||
APIGroup: "resource.k8s.io",
|
||||
},
|
||||
}
|
||||
structuredResourceClassWithCRD = &resourceapi.ResourceClass{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: className,
|
||||
},
|
||||
DriverName: "some-driver",
|
||||
StructuredParameters: ptr.To(true),
|
||||
ParametersRef: &resourceapi.ResourceClassParametersReference{
|
||||
Name: className,
|
||||
Namespace: namespace,
|
||||
Kind: "ResourceClassParameters",
|
||||
APIGroup: "example.com",
|
||||
},
|
||||
}
|
||||
|
||||
podWithClaimName = st.MakePod().Name(podName).Namespace(namespace).
|
||||
@@ -124,39 +96,29 @@ var (
|
||||
PodResourceClaims(v1.PodResourceClaim{Name: resourceName2, ResourceClaimName: &claimName2}).
|
||||
Obj()
|
||||
|
||||
workerNode = &st.MakeNode().Name("worker").Label("kubernetes.io/hostname", "worker").Node
|
||||
workerNodeSlice = st.MakeResourceSlice("worker", "some-driver").NamedResourcesInstances("instance-1").Obj()
|
||||
// Node with "instance-1" device and no device attributes.
|
||||
workerNode = &st.MakeNode().Name(nodeName).Label("kubernetes.io/hostname", nodeName).Node
|
||||
workerNodeSlice = st.MakeResourceSlice(nodeName, driver).Device("instance-1", nil).Obj()
|
||||
|
||||
claimParameters = st.MakeClaimParameters().Name(claimName).Namespace(namespace).
|
||||
NamedResourcesRequests("some-driver", "true").
|
||||
GeneratedFrom(&resourceapi.ResourceClaimParametersReference{
|
||||
Name: claimName,
|
||||
Kind: "ResourceClaimParameters",
|
||||
APIGroup: "example.com",
|
||||
}).
|
||||
Obj()
|
||||
claimParametersOtherNamespace = st.MakeClaimParameters().Name(claimName).Namespace(namespace+"-2").
|
||||
NamedResourcesRequests("some-driver", "true").
|
||||
GeneratedFrom(&resourceapi.ResourceClaimParametersReference{
|
||||
Name: claimName,
|
||||
Kind: "ResourceClaimParameters",
|
||||
APIGroup: "example.com",
|
||||
}).
|
||||
Obj()
|
||||
classParameters = st.MakeClassParameters().Name(className).Namespace(namespace).
|
||||
NamedResourcesFilters("some-driver", "true").
|
||||
GeneratedFrom(&resourceapi.ResourceClassParametersReference{
|
||||
Name: className,
|
||||
Namespace: namespace,
|
||||
Kind: "ResourceClassParameters",
|
||||
APIGroup: "example.com",
|
||||
}).
|
||||
Obj()
|
||||
// Node with same device, but now with a "healthy" boolean attribute.
|
||||
workerNode2 = &st.MakeNode().Name(node2Name).Label("kubernetes.io/hostname", node2Name).Node
|
||||
workerNode2Slice = st.MakeResourceSlice(node2Name, driver).Device("instance-1", map[resourceapi.QualifiedName]resourceapi.DeviceAttribute{attrName: {BoolValue: ptr.To(true)}}).Obj()
|
||||
|
||||
claim = st.MakeResourceClaim().
|
||||
// Yet another node, same as the second one.
|
||||
workerNode3 = &st.MakeNode().Name(node3Name).Label("kubernetes.io/hostname", node3Name).Node
|
||||
workerNode3Slice = st.MakeResourceSlice(node3Name, driver).Device("instance-1", map[resourceapi.QualifiedName]resourceapi.DeviceAttribute{attrName: {BoolValue: ptr.To(true)}}).Obj()
|
||||
|
||||
brokenSelector = resourceapi.DeviceSelector{
|
||||
CEL: &resourceapi.CELDeviceSelector{
|
||||
// Not set for workerNode.
|
||||
Expression: fmt.Sprintf(`device.attributes["%s"].%s`, driver, attrName),
|
||||
},
|
||||
}
|
||||
|
||||
claim = st.MakeResourceClaim(controller).
|
||||
Name(claimName).
|
||||
Namespace(namespace).
|
||||
ResourceClassName(className).
|
||||
Request(className).
|
||||
Obj()
|
||||
pendingClaim = st.FromResourceClaim(claim).
|
||||
OwnerReference(podName, podUID, podKind).
|
||||
@@ -164,44 +126,53 @@ var (
|
||||
pendingClaim2 = st.FromResourceClaim(pendingClaim).
|
||||
Name(claimName2).
|
||||
Obj()
|
||||
allocationResult = &resourceapi.AllocationResult{
|
||||
Controller: controller,
|
||||
Devices: resourceapi.DeviceAllocationResult{
|
||||
Results: []resourceapi.DeviceRequestAllocationResult{{
|
||||
Driver: driver,
|
||||
Pool: nodeName,
|
||||
Device: "instance-1",
|
||||
Request: "req-1",
|
||||
}},
|
||||
},
|
||||
NodeSelector: func() *v1.NodeSelector {
|
||||
// Label selector...
|
||||
nodeSelector := st.MakeNodeSelector().In("metadata.name", []string{nodeName}).Obj()
|
||||
// ... but we need a field selector, so let's swap.
|
||||
nodeSelector.NodeSelectorTerms[0].MatchExpressions, nodeSelector.NodeSelectorTerms[0].MatchFields = nodeSelector.NodeSelectorTerms[0].MatchFields, nodeSelector.NodeSelectorTerms[0].MatchExpressions
|
||||
return nodeSelector
|
||||
}(),
|
||||
}
|
||||
deallocatingClaim = st.FromResourceClaim(pendingClaim).
|
||||
Allocation("some-driver", &resourceapi.AllocationResult{}).
|
||||
Allocation(allocationResult).
|
||||
DeallocationRequested(true).
|
||||
Obj()
|
||||
inUseClaim = st.FromResourceClaim(pendingClaim).
|
||||
Allocation("some-driver", &resourceapi.AllocationResult{}).
|
||||
Allocation(allocationResult).
|
||||
ReservedForPod(podName, types.UID(podUID)).
|
||||
Obj()
|
||||
structuredInUseClaim = st.FromResourceClaim(inUseClaim).
|
||||
Structured("worker", "instance-1").
|
||||
Structured().
|
||||
Obj()
|
||||
allocatedClaim = st.FromResourceClaim(pendingClaim).
|
||||
Allocation("some-driver", &resourceapi.AllocationResult{}).
|
||||
Allocation(allocationResult).
|
||||
Obj()
|
||||
|
||||
pendingClaimWithParams = st.FromResourceClaim(pendingClaim).ParametersRef(claimName).Obj()
|
||||
structuredAllocatedClaim = st.FromResourceClaim(allocatedClaim).Structured("worker", "instance-1").Obj()
|
||||
structuredAllocatedClaimWithParams = st.FromResourceClaim(structuredAllocatedClaim).ParametersRef(claimName).Obj()
|
||||
|
||||
otherStructuredAllocatedClaim = st.FromResourceClaim(structuredAllocatedClaim).Name(structuredAllocatedClaim.Name + "-other").Obj()
|
||||
|
||||
allocatedClaimWithWrongTopology = st.FromResourceClaim(allocatedClaim).
|
||||
Allocation("some-driver", &resourceapi.AllocationResult{AvailableOnNodes: st.MakeNodeSelector().In("no-such-label", []string{"no-such-value"}).Obj()}).
|
||||
Allocation(&resourceapi.AllocationResult{Controller: controller, NodeSelector: st.MakeNodeSelector().In("no-such-label", []string{"no-such-value"}).Obj()}).
|
||||
Obj()
|
||||
structuredAllocatedClaimWithWrongTopology = st.FromResourceClaim(allocatedClaimWithWrongTopology).
|
||||
Structured("worker-2", "instance-1").
|
||||
Obj()
|
||||
allocatedClaimWithGoodTopology = st.FromResourceClaim(allocatedClaim).
|
||||
Allocation("some-driver", &resourceapi.AllocationResult{AvailableOnNodes: st.MakeNodeSelector().In("kubernetes.io/hostname", []string{"worker"}).Obj()}).
|
||||
Allocation(&resourceapi.AllocationResult{Controller: controller, NodeSelector: st.MakeNodeSelector().In("kubernetes.io/hostname", []string{nodeName}).Obj()}).
|
||||
Obj()
|
||||
structuredAllocatedClaimWithGoodTopology = st.FromResourceClaim(allocatedClaimWithGoodTopology).
|
||||
Structured("worker", "instance-1").
|
||||
Obj()
|
||||
otherClaim = st.MakeResourceClaim().
|
||||
otherClaim = st.MakeResourceClaim(controller).
|
||||
Name("not-my-claim").
|
||||
Namespace(namespace).
|
||||
ResourceClassName(className).
|
||||
Request(className).
|
||||
Obj()
|
||||
otherAllocatedClaim = st.FromResourceClaim(otherClaim).
|
||||
Allocation(allocationResult).
|
||||
Obj()
|
||||
|
||||
scheduling = st.MakePodSchedulingContexts().Name(podName).Namespace(namespace).
|
||||
OwnerReference(podName, podUID, podKind).
|
||||
@@ -224,38 +195,37 @@ func reserve(claim *resourceapi.ResourceClaim, pod *v1.Pod) *resourceapi.Resourc
|
||||
Obj()
|
||||
}
|
||||
|
||||
// claimWithCRD replaces the in-tree group with "example.com".
|
||||
func claimWithCRD(claim *resourceapi.ResourceClaim) *resourceapi.ResourceClaim {
|
||||
func structuredClaim(claim *resourceapi.ResourceClaim) *resourceapi.ResourceClaim {
|
||||
return st.FromResourceClaim(claim).
|
||||
Structured().
|
||||
Obj()
|
||||
}
|
||||
|
||||
func breakCELInClaim(claim *resourceapi.ResourceClaim) *resourceapi.ResourceClaim {
|
||||
claim = claim.DeepCopy()
|
||||
claim.Spec.ParametersRef.APIGroup = "example.com"
|
||||
for i := range claim.Spec.Devices.Requests {
|
||||
for e := range claim.Spec.Devices.Requests[i].Selectors {
|
||||
claim.Spec.Devices.Requests[i].Selectors[e] = brokenSelector
|
||||
}
|
||||
if len(claim.Spec.Devices.Requests[i].Selectors) == 0 {
|
||||
claim.Spec.Devices.Requests[i].Selectors = []resourceapi.DeviceSelector{brokenSelector}
|
||||
}
|
||||
}
|
||||
return claim
|
||||
}
|
||||
|
||||
// classWithCRD replaces the in-tree group with "example.com".
|
||||
func classWithCRD(class *resourceapi.ResourceClass) *resourceapi.ResourceClass {
|
||||
func breakCELInClass(class *resourceapi.DeviceClass) *resourceapi.DeviceClass {
|
||||
class = class.DeepCopy()
|
||||
class.ParametersRef.APIGroup = "example.com"
|
||||
for i := range class.Spec.Selectors {
|
||||
class.Spec.Selectors[i] = brokenSelector
|
||||
}
|
||||
if len(class.Spec.Selectors) == 0 {
|
||||
class.Spec.Selectors = []resourceapi.DeviceSelector{brokenSelector}
|
||||
}
|
||||
|
||||
return class
|
||||
}
|
||||
|
||||
func breakCELInClaimParameters(parameters *resourceapi.ResourceClaimParameters) *resourceapi.ResourceClaimParameters {
|
||||
parameters = parameters.DeepCopy()
|
||||
for i := range parameters.DriverRequests {
|
||||
for e := range parameters.DriverRequests[i].Requests {
|
||||
parameters.DriverRequests[i].Requests[e].NamedResources.Selector = `attributes.bool["no-such-attribute"]`
|
||||
}
|
||||
}
|
||||
return parameters
|
||||
}
|
||||
|
||||
func breakCELInClassParameters(parameters *resourceapi.ResourceClassParameters) *resourceapi.ResourceClassParameters {
|
||||
parameters = parameters.DeepCopy()
|
||||
for i := range parameters.Filters {
|
||||
parameters.Filters[i].NamedResources.Selector = `attributes.bool["no-such-attribute"]`
|
||||
}
|
||||
return parameters
|
||||
}
|
||||
|
||||
// result defines the expected outcome of some operation. It covers
|
||||
// operation's status and the state of the world (= objects).
|
||||
type result struct {
|
||||
@@ -337,7 +307,7 @@ func TestPlugin(t *testing.T) {
|
||||
nodes []*v1.Node // default if unset is workerNode
|
||||
pod *v1.Pod
|
||||
claims []*resourceapi.ResourceClaim
|
||||
classes []*resourceapi.ResourceClass
|
||||
classes []*resourceapi.DeviceClass
|
||||
schedulings []*resourceapi.PodSchedulingContext
|
||||
|
||||
// objs get stored directly in the fake client, without passing
|
||||
@@ -378,7 +348,7 @@ func TestPlugin(t *testing.T) {
|
||||
},
|
||||
"claim-reference-structured": {
|
||||
pod: podWithClaimName,
|
||||
claims: []*resourceapi.ResourceClaim{structuredAllocatedClaim, otherClaim},
|
||||
claims: []*resourceapi.ResourceClaim{structuredClaim(allocatedClaim), otherClaim},
|
||||
want: want{
|
||||
prebind: result{
|
||||
changes: change{
|
||||
@@ -412,7 +382,7 @@ func TestPlugin(t *testing.T) {
|
||||
},
|
||||
"claim-template-structured": {
|
||||
pod: podWithClaimTemplateInStatus,
|
||||
claims: []*resourceapi.ResourceClaim{structuredAllocatedClaim, otherClaim},
|
||||
claims: []*resourceapi.ResourceClaim{structuredClaim(allocatedClaim), otherClaim},
|
||||
want: want{
|
||||
prebind: result{
|
||||
changes: change{
|
||||
@@ -464,12 +434,12 @@ func TestPlugin(t *testing.T) {
|
||||
},
|
||||
"structured-no-resources": {
|
||||
pod: podWithClaimName,
|
||||
claims: []*resourceapi.ResourceClaim{pendingClaim},
|
||||
classes: []*resourceapi.ResourceClass{structuredResourceClass},
|
||||
claims: []*resourceapi.ResourceClaim{structuredClaim(pendingClaim)},
|
||||
classes: []*resourceapi.DeviceClass{deviceClass},
|
||||
want: want{
|
||||
filter: perNodeResult{
|
||||
workerNode.Name: {
|
||||
status: framework.NewStatus(framework.UnschedulableAndUnresolvable, `resourceclaim cannot be allocated for the node (unsuitable)`),
|
||||
status: framework.NewStatus(framework.UnschedulableAndUnresolvable, `cannot allocate all claims`),
|
||||
},
|
||||
},
|
||||
postfilter: result{
|
||||
@@ -479,28 +449,28 @@ func TestPlugin(t *testing.T) {
|
||||
},
|
||||
"structured-with-resources": {
|
||||
pod: podWithClaimName,
|
||||
claims: []*resourceapi.ResourceClaim{pendingClaim},
|
||||
classes: []*resourceapi.ResourceClass{structuredResourceClass},
|
||||
claims: []*resourceapi.ResourceClaim{structuredClaim(pendingClaim)},
|
||||
classes: []*resourceapi.DeviceClass{deviceClass},
|
||||
objs: []apiruntime.Object{workerNodeSlice},
|
||||
want: want{
|
||||
reserve: result{
|
||||
inFlightClaim: structuredAllocatedClaim,
|
||||
inFlightClaim: structuredClaim(allocatedClaim),
|
||||
},
|
||||
prebind: result{
|
||||
assumedClaim: reserve(structuredAllocatedClaim, podWithClaimName),
|
||||
assumedClaim: reserve(structuredClaim(allocatedClaim), podWithClaimName),
|
||||
changes: change{
|
||||
claim: func(claim *resourceapi.ResourceClaim) *resourceapi.ResourceClaim {
|
||||
if claim.Name == claimName {
|
||||
claim = claim.DeepCopy()
|
||||
claim.Finalizers = structuredAllocatedClaim.Finalizers
|
||||
claim.Status = structuredInUseClaim.Status
|
||||
claim.Finalizers = structuredClaim(allocatedClaim).Finalizers
|
||||
claim.Status = structuredClaim(inUseClaim).Status
|
||||
}
|
||||
return claim
|
||||
},
|
||||
},
|
||||
},
|
||||
postbind: result{
|
||||
assumedClaim: reserve(structuredAllocatedClaim, podWithClaimName),
|
||||
assumedClaim: reserve(structuredClaim(allocatedClaim), podWithClaimName),
|
||||
},
|
||||
},
|
||||
},
|
||||
@@ -509,18 +479,18 @@ func TestPlugin(t *testing.T) {
|
||||
// the scheduler got interrupted.
|
||||
pod: podWithClaimName,
|
||||
claims: func() []*resourceapi.ResourceClaim {
|
||||
claim := pendingClaim.DeepCopy()
|
||||
claim.Finalizers = structuredAllocatedClaim.Finalizers
|
||||
claim := structuredClaim(pendingClaim)
|
||||
claim.Finalizers = structuredClaim(allocatedClaim).Finalizers
|
||||
return []*resourceapi.ResourceClaim{claim}
|
||||
}(),
|
||||
classes: []*resourceapi.ResourceClass{structuredResourceClass},
|
||||
classes: []*resourceapi.DeviceClass{deviceClass},
|
||||
objs: []apiruntime.Object{workerNodeSlice},
|
||||
want: want{
|
||||
reserve: result{
|
||||
inFlightClaim: structuredAllocatedClaim,
|
||||
inFlightClaim: structuredClaim(allocatedClaim),
|
||||
},
|
||||
prebind: result{
|
||||
assumedClaim: reserve(structuredAllocatedClaim, podWithClaimName),
|
||||
assumedClaim: reserve(structuredClaim(allocatedClaim), podWithClaimName),
|
||||
changes: change{
|
||||
claim: func(claim *resourceapi.ResourceClaim) *resourceapi.ResourceClaim {
|
||||
if claim.Name == claimName {
|
||||
@@ -532,7 +502,7 @@ func TestPlugin(t *testing.T) {
|
||||
},
|
||||
},
|
||||
postbind: result{
|
||||
assumedClaim: reserve(structuredAllocatedClaim, podWithClaimName),
|
||||
assumedClaim: reserve(structuredClaim(allocatedClaim), podWithClaimName),
|
||||
},
|
||||
},
|
||||
},
|
||||
@@ -541,11 +511,11 @@ func TestPlugin(t *testing.T) {
|
||||
// removed before the scheduler reaches PreBind.
|
||||
pod: podWithClaimName,
|
||||
claims: func() []*resourceapi.ResourceClaim {
|
||||
claim := pendingClaim.DeepCopy()
|
||||
claim.Finalizers = structuredAllocatedClaim.Finalizers
|
||||
claim := structuredClaim(pendingClaim)
|
||||
claim.Finalizers = structuredClaim(allocatedClaim).Finalizers
|
||||
return []*resourceapi.ResourceClaim{claim}
|
||||
}(),
|
||||
classes: []*resourceapi.ResourceClass{structuredResourceClass},
|
||||
classes: []*resourceapi.DeviceClass{deviceClass},
|
||||
objs: []apiruntime.Object{workerNodeSlice},
|
||||
prepare: prepare{
|
||||
prebind: change{
|
||||
@@ -557,15 +527,15 @@ func TestPlugin(t *testing.T) {
|
||||
},
|
||||
want: want{
|
||||
reserve: result{
|
||||
inFlightClaim: structuredAllocatedClaim,
|
||||
inFlightClaim: structuredClaim(allocatedClaim),
|
||||
},
|
||||
prebind: result{
|
||||
assumedClaim: reserve(structuredAllocatedClaim, podWithClaimName),
|
||||
assumedClaim: reserve(structuredClaim(allocatedClaim), podWithClaimName),
|
||||
changes: change{
|
||||
claim: func(claim *resourceapi.ResourceClaim) *resourceapi.ResourceClaim {
|
||||
if claim.Name == claimName {
|
||||
claim = claim.DeepCopy()
|
||||
claim.Finalizers = structuredAllocatedClaim.Finalizers
|
||||
claim.Finalizers = structuredClaim(allocatedClaim).Finalizers
|
||||
claim.Status = structuredInUseClaim.Status
|
||||
}
|
||||
return claim
|
||||
@@ -573,7 +543,7 @@ func TestPlugin(t *testing.T) {
|
||||
},
|
||||
},
|
||||
postbind: result{
|
||||
assumedClaim: reserve(structuredAllocatedClaim, podWithClaimName),
|
||||
assumedClaim: reserve(structuredClaim(allocatedClaim), podWithClaimName),
|
||||
},
|
||||
},
|
||||
},
|
||||
@@ -581,23 +551,23 @@ func TestPlugin(t *testing.T) {
|
||||
// No finalizer initially, then it gets added before
|
||||
// the scheduler reaches PreBind. Shouldn't happen?
|
||||
pod: podWithClaimName,
|
||||
claims: []*resourceapi.ResourceClaim{pendingClaim},
|
||||
classes: []*resourceapi.ResourceClass{structuredResourceClass},
|
||||
claims: []*resourceapi.ResourceClaim{structuredClaim(pendingClaim)},
|
||||
classes: []*resourceapi.DeviceClass{deviceClass},
|
||||
objs: []apiruntime.Object{workerNodeSlice},
|
||||
prepare: prepare{
|
||||
prebind: change{
|
||||
claim: func(claim *resourceapi.ResourceClaim) *resourceapi.ResourceClaim {
|
||||
claim.Finalizers = structuredAllocatedClaim.Finalizers
|
||||
claim.Finalizers = structuredClaim(allocatedClaim).Finalizers
|
||||
return claim
|
||||
},
|
||||
},
|
||||
},
|
||||
want: want{
|
||||
reserve: result{
|
||||
inFlightClaim: structuredAllocatedClaim,
|
||||
inFlightClaim: structuredClaim(allocatedClaim),
|
||||
},
|
||||
prebind: result{
|
||||
assumedClaim: reserve(structuredAllocatedClaim, podWithClaimName),
|
||||
assumedClaim: reserve(structuredClaim(allocatedClaim), podWithClaimName),
|
||||
changes: change{
|
||||
claim: func(claim *resourceapi.ResourceClaim) *resourceapi.ResourceClaim {
|
||||
if claim.Name == claimName {
|
||||
@@ -609,31 +579,31 @@ func TestPlugin(t *testing.T) {
|
||||
},
|
||||
},
|
||||
postbind: result{
|
||||
assumedClaim: reserve(structuredAllocatedClaim, podWithClaimName),
|
||||
assumedClaim: reserve(structuredClaim(allocatedClaim), podWithClaimName),
|
||||
},
|
||||
},
|
||||
},
|
||||
"structured-skip-bind": {
|
||||
pod: podWithClaimName,
|
||||
claims: []*resourceapi.ResourceClaim{pendingClaim},
|
||||
classes: []*resourceapi.ResourceClass{structuredResourceClass},
|
||||
claims: []*resourceapi.ResourceClaim{structuredClaim(pendingClaim)},
|
||||
classes: []*resourceapi.DeviceClass{deviceClass},
|
||||
objs: []apiruntime.Object{workerNodeSlice},
|
||||
want: want{
|
||||
reserve: result{
|
||||
inFlightClaim: structuredAllocatedClaim,
|
||||
inFlightClaim: structuredClaim(allocatedClaim),
|
||||
},
|
||||
unreserveBeforePreBind: &result{},
|
||||
},
|
||||
},
|
||||
"structured-exhausted-resources": {
|
||||
pod: podWithClaimName,
|
||||
claims: []*resourceapi.ResourceClaim{pendingClaim, otherStructuredAllocatedClaim},
|
||||
classes: []*resourceapi.ResourceClass{structuredResourceClass},
|
||||
claims: []*resourceapi.ResourceClaim{structuredClaim(pendingClaim), structuredClaim(otherAllocatedClaim)},
|
||||
classes: []*resourceapi.DeviceClass{deviceClass},
|
||||
objs: []apiruntime.Object{workerNodeSlice},
|
||||
want: want{
|
||||
filter: perNodeResult{
|
||||
workerNode.Name: {
|
||||
status: framework.NewStatus(framework.UnschedulableAndUnresolvable, `resourceclaim cannot be allocated for the node (unsuitable)`),
|
||||
status: framework.NewStatus(framework.UnschedulableAndUnresolvable, `cannot allocate all claims`),
|
||||
},
|
||||
},
|
||||
postfilter: result{
|
||||
@@ -642,182 +612,70 @@ func TestPlugin(t *testing.T) {
|
||||
},
|
||||
},
|
||||
|
||||
"with-parameters": {
|
||||
pod: podWithClaimName,
|
||||
claims: []*resourceapi.ResourceClaim{pendingClaimWithParams},
|
||||
classes: []*resourceapi.ResourceClass{structuredResourceClassWithParams},
|
||||
objs: []apiruntime.Object{claimParameters, classParameters, workerNodeSlice},
|
||||
want: want{
|
||||
reserve: result{
|
||||
inFlightClaim: structuredAllocatedClaimWithParams,
|
||||
},
|
||||
prebind: result{
|
||||
assumedClaim: reserve(structuredAllocatedClaimWithParams, podWithClaimName),
|
||||
changes: change{
|
||||
claim: func(claim *resourceapi.ResourceClaim) *resourceapi.ResourceClaim {
|
||||
if claim.Name == claimName {
|
||||
claim = claim.DeepCopy()
|
||||
claim.Finalizers = structuredAllocatedClaim.Finalizers
|
||||
claim.Status = structuredInUseClaim.Status
|
||||
}
|
||||
return claim
|
||||
},
|
||||
},
|
||||
},
|
||||
postbind: result{
|
||||
assumedClaim: reserve(structuredAllocatedClaimWithParams, podWithClaimName),
|
||||
},
|
||||
},
|
||||
},
|
||||
|
||||
"with-translated-parameters": {
|
||||
pod: podWithClaimName,
|
||||
claims: []*resourceapi.ResourceClaim{claimWithCRD(pendingClaimWithParams)},
|
||||
classes: []*resourceapi.ResourceClass{classWithCRD(structuredResourceClassWithCRD)},
|
||||
objs: []apiruntime.Object{claimParameters, claimParametersOtherNamespace /* must be ignored */, classParameters, workerNodeSlice},
|
||||
want: want{
|
||||
reserve: result{
|
||||
inFlightClaim: claimWithCRD(structuredAllocatedClaimWithParams),
|
||||
},
|
||||
prebind: result{
|
||||
assumedClaim: reserve(claimWithCRD(structuredAllocatedClaimWithParams), podWithClaimName),
|
||||
changes: change{
|
||||
claim: func(claim *resourceapi.ResourceClaim) *resourceapi.ResourceClaim {
|
||||
if claim.Name == claimName {
|
||||
claim = claim.DeepCopy()
|
||||
claim.Finalizers = structuredAllocatedClaim.Finalizers
|
||||
claim.Status = structuredInUseClaim.Status
|
||||
}
|
||||
return claim
|
||||
},
|
||||
},
|
||||
},
|
||||
postbind: result{
|
||||
assumedClaim: reserve(claimWithCRD(structuredAllocatedClaimWithParams), podWithClaimName),
|
||||
},
|
||||
},
|
||||
},
|
||||
|
||||
"missing-class-parameters": {
|
||||
pod: podWithClaimName,
|
||||
claims: []*resourceapi.ResourceClaim{pendingClaimWithParams},
|
||||
classes: []*resourceapi.ResourceClass{structuredResourceClassWithParams},
|
||||
objs: []apiruntime.Object{claimParameters, workerNodeSlice},
|
||||
want: want{
|
||||
prefilter: result{
|
||||
status: framework.NewStatus(framework.UnschedulableAndUnresolvable, `class parameters default/my-resource-class not found`),
|
||||
},
|
||||
postfilter: result{
|
||||
status: framework.NewStatus(framework.Unschedulable, `no new claims to deallocate`),
|
||||
},
|
||||
},
|
||||
},
|
||||
|
||||
"missing-claim-parameters": {
|
||||
pod: podWithClaimName,
|
||||
claims: []*resourceapi.ResourceClaim{pendingClaimWithParams},
|
||||
classes: []*resourceapi.ResourceClass{structuredResourceClassWithParams},
|
||||
objs: []apiruntime.Object{classParameters, workerNodeSlice},
|
||||
want: want{
|
||||
prefilter: result{
|
||||
status: framework.NewStatus(framework.UnschedulableAndUnresolvable, `claim parameters default/my-pod-my-resource not found`),
|
||||
},
|
||||
postfilter: result{
|
||||
status: framework.NewStatus(framework.Unschedulable, `no new claims to deallocate`),
|
||||
},
|
||||
},
|
||||
},
|
||||
|
||||
"missing-translated-class-parameters": {
|
||||
pod: podWithClaimName,
|
||||
claims: []*resourceapi.ResourceClaim{claimWithCRD(pendingClaimWithParams)},
|
||||
classes: []*resourceapi.ResourceClass{classWithCRD(structuredResourceClassWithCRD)},
|
||||
objs: []apiruntime.Object{claimParameters, workerNodeSlice},
|
||||
want: want{
|
||||
prefilter: result{
|
||||
status: framework.NewStatus(framework.UnschedulableAndUnresolvable, `generated class parameters for ResourceClassParameters.example.com default/my-resource-class not found`),
|
||||
},
|
||||
postfilter: result{
|
||||
status: framework.NewStatus(framework.Unschedulable, `no new claims to deallocate`),
|
||||
},
|
||||
},
|
||||
},
|
||||
|
||||
"missing-translated-claim-parameters": {
|
||||
pod: podWithClaimName,
|
||||
claims: []*resourceapi.ResourceClaim{claimWithCRD(pendingClaimWithParams)},
|
||||
classes: []*resourceapi.ResourceClass{classWithCRD(structuredResourceClassWithCRD)},
|
||||
objs: []apiruntime.Object{classParameters, workerNodeSlice},
|
||||
want: want{
|
||||
prefilter: result{
|
||||
status: framework.NewStatus(framework.UnschedulableAndUnresolvable, `generated claim parameters for ResourceClaimParameters.example.com default/my-pod-my-resource not found`),
|
||||
},
|
||||
postfilter: result{
|
||||
status: framework.NewStatus(framework.Unschedulable, `no new claims to deallocate`),
|
||||
},
|
||||
},
|
||||
},
|
||||
|
||||
"too-many-translated-class-parameters": {
|
||||
pod: podWithClaimName,
|
||||
claims: []*resourceapi.ResourceClaim{claimWithCRD(pendingClaimWithParams)},
|
||||
classes: []*resourceapi.ResourceClass{classWithCRD(structuredResourceClassWithCRD)},
|
||||
objs: []apiruntime.Object{claimParameters, classParameters, st.FromClassParameters(classParameters).Name("other").Obj() /* too many */, workerNodeSlice},
|
||||
want: want{
|
||||
prefilter: result{
|
||||
status: framework.AsStatus(errors.New(`multiple generated class parameters for ResourceClassParameters.example.com my-resource-class found: [default/my-resource-class default/other]`)),
|
||||
},
|
||||
postfilter: result{
|
||||
status: framework.NewStatus(framework.Unschedulable, `no new claims to deallocate`),
|
||||
},
|
||||
},
|
||||
},
|
||||
|
||||
"too-many-translated-claim-parameters": {
|
||||
pod: podWithClaimName,
|
||||
claims: []*resourceapi.ResourceClaim{claimWithCRD(pendingClaimWithParams)},
|
||||
classes: []*resourceapi.ResourceClass{classWithCRD(structuredResourceClassWithCRD)},
|
||||
objs: []apiruntime.Object{claimParameters, st.FromClaimParameters(claimParameters).Name("other").Obj() /* too many */, classParameters, workerNodeSlice},
|
||||
want: want{
|
||||
prefilter: result{
|
||||
status: framework.AsStatus(errors.New(`multiple generated claim parameters for ResourceClaimParameters.example.com default/my-pod-my-resource found: [default/my-pod-my-resource default/other]`)),
|
||||
},
|
||||
postfilter: result{
|
||||
status: framework.NewStatus(framework.Unschedulable, `no new claims to deallocate`),
|
||||
},
|
||||
},
|
||||
},
|
||||
|
||||
"claim-parameters-CEL-runtime-error": {
|
||||
pod: podWithClaimName,
|
||||
claims: []*resourceapi.ResourceClaim{pendingClaimWithParams},
|
||||
classes: []*resourceapi.ResourceClass{structuredResourceClassWithParams},
|
||||
objs: []apiruntime.Object{breakCELInClaimParameters(claimParameters), classParameters, workerNodeSlice},
|
||||
claims: []*resourceapi.ResourceClaim{breakCELInClaim(structuredClaim(pendingClaim))},
|
||||
classes: []*resourceapi.DeviceClass{deviceClass},
|
||||
objs: []apiruntime.Object{workerNodeSlice},
|
||||
want: want{
|
||||
filter: perNodeResult{
|
||||
workerNode.Name: {
|
||||
status: framework.NewStatus(framework.UnschedulableAndUnresolvable, `checking structured parameters failed: checking node "worker" and resources of driver "some-driver": evaluate request CEL expression: no such key: no-such-attribute`),
|
||||
status: framework.AsStatus(errors.New(`claim default/my-pod-my-resource: selector #0: CEL runtime error: no such key: ` + string(attrName))),
|
||||
},
|
||||
},
|
||||
postfilter: result{
|
||||
status: framework.NewStatus(framework.Unschedulable, `still not schedulable`),
|
||||
},
|
||||
},
|
||||
},
|
||||
|
||||
"class-parameters-CEL-runtime-error": {
|
||||
pod: podWithClaimName,
|
||||
claims: []*resourceapi.ResourceClaim{pendingClaimWithParams},
|
||||
classes: []*resourceapi.ResourceClass{structuredResourceClassWithParams},
|
||||
objs: []apiruntime.Object{claimParameters, breakCELInClassParameters(classParameters), workerNodeSlice},
|
||||
claims: []*resourceapi.ResourceClaim{structuredClaim(pendingClaim)},
|
||||
classes: []*resourceapi.DeviceClass{breakCELInClass(deviceClass)},
|
||||
objs: []apiruntime.Object{workerNodeSlice},
|
||||
want: want{
|
||||
filter: perNodeResult{
|
||||
workerNode.Name: {
|
||||
status: framework.NewStatus(framework.UnschedulableAndUnresolvable, `checking structured parameters failed: checking node "worker" and resources of driver "some-driver": evaluate filter CEL expression: no such key: no-such-attribute`),
|
||||
status: framework.AsStatus(errors.New(`class my-resource-class: selector #0: CEL runtime error: no such key: ` + string(attrName))),
|
||||
},
|
||||
},
|
||||
postfilter: result{
|
||||
status: framework.NewStatus(framework.Unschedulable, `still not schedulable`),
|
||||
},
|
||||
},
|
||||
|
||||
// When pod scheduling encounters CEL runtime errors for some nodes, but not all,
|
||||
// it should still not schedule the pod because there is something wrong with it.
|
||||
// Scheduling it would make it harder to detect that there is a problem.
|
||||
//
|
||||
// This matches the "keeps pod pending because of CEL runtime errors" E2E test.
|
||||
"CEL-runtime-error-for-one-of-two-nodes": {
|
||||
nodes: []*v1.Node{workerNode, workerNode2},
|
||||
pod: podWithClaimName,
|
||||
claims: []*resourceapi.ResourceClaim{breakCELInClaim(structuredClaim(pendingClaim))},
|
||||
classes: []*resourceapi.DeviceClass{deviceClass},
|
||||
objs: []apiruntime.Object{workerNodeSlice, workerNode2Slice},
|
||||
want: want{
|
||||
filter: perNodeResult{
|
||||
workerNode.Name: {
|
||||
status: framework.AsStatus(errors.New(`claim default/my-pod-my-resource: selector #0: CEL runtime error: no such key: ` + string(attrName))),
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
|
||||
// When two nodes where found, PreScore gets called.
|
||||
"CEL-runtime-error-for-one-of-three-nodes": {
|
||||
nodes: []*v1.Node{workerNode, workerNode2, workerNode3},
|
||||
pod: podWithClaimName,
|
||||
claims: []*resourceapi.ResourceClaim{breakCELInClaim(structuredClaim(pendingClaim))},
|
||||
classes: []*resourceapi.DeviceClass{deviceClass},
|
||||
objs: []apiruntime.Object{workerNodeSlice, workerNode2Slice, workerNode3Slice},
|
||||
want: want{
|
||||
filter: perNodeResult{
|
||||
workerNode.Name: {
|
||||
status: framework.NewStatus(framework.UnschedulableAndUnresolvable, `claim default/my-pod-my-resource: selector #0: CEL runtime error: no such key: `+string(attrName)),
|
||||
},
|
||||
},
|
||||
prescore: result{
|
||||
// This is the error found during Filter.
|
||||
status: framework.NewStatus(framework.UnschedulableAndUnresolvable, `filter node worker: claim default/my-pod-my-resource: selector #0: CEL runtime error: no such key: healthy`),
|
||||
},
|
||||
},
|
||||
},
|
||||
@@ -839,7 +697,7 @@ func TestPlugin(t *testing.T) {
|
||||
claims: []*resourceapi.ResourceClaim{pendingClaim},
|
||||
want: want{
|
||||
prefilter: result{
|
||||
status: framework.NewStatus(framework.UnschedulableAndUnresolvable, fmt.Sprintf("resource class %s does not exist", className)),
|
||||
status: framework.NewStatus(framework.UnschedulableAndUnresolvable, fmt.Sprintf("request req-1: device class %s does not exist", className)),
|
||||
},
|
||||
postfilter: result{
|
||||
status: framework.NewStatus(framework.Unschedulable, `no new claims to deallocate`),
|
||||
@@ -851,7 +709,7 @@ func TestPlugin(t *testing.T) {
|
||||
// and select a node.
|
||||
pod: podWithClaimName,
|
||||
claims: []*resourceapi.ResourceClaim{pendingClaim},
|
||||
classes: []*resourceapi.ResourceClass{resourceClass},
|
||||
classes: []*resourceapi.DeviceClass{deviceClass},
|
||||
want: want{
|
||||
prebind: result{
|
||||
status: framework.NewStatus(framework.Pending, `waiting for resource driver`),
|
||||
@@ -865,7 +723,7 @@ func TestPlugin(t *testing.T) {
|
||||
// there are multiple claims.
|
||||
pod: podWithTwoClaimNames,
|
||||
claims: []*resourceapi.ResourceClaim{pendingClaim, pendingClaim2},
|
||||
classes: []*resourceapi.ResourceClass{resourceClass},
|
||||
classes: []*resourceapi.DeviceClass{deviceClass},
|
||||
want: want{
|
||||
prebind: result{
|
||||
status: framework.NewStatus(framework.Pending, `waiting for resource driver`),
|
||||
@@ -879,7 +737,7 @@ func TestPlugin(t *testing.T) {
|
||||
pod: podWithClaimName,
|
||||
claims: []*resourceapi.ResourceClaim{pendingClaim},
|
||||
schedulings: []*resourceapi.PodSchedulingContext{schedulingInfo},
|
||||
classes: []*resourceapi.ResourceClass{resourceClass},
|
||||
classes: []*resourceapi.DeviceClass{deviceClass},
|
||||
want: want{
|
||||
prebind: result{
|
||||
status: framework.NewStatus(framework.Pending, `waiting for resource driver`),
|
||||
@@ -899,7 +757,7 @@ func TestPlugin(t *testing.T) {
|
||||
pod: podWithClaimName,
|
||||
claims: []*resourceapi.ResourceClaim{pendingClaim},
|
||||
schedulings: []*resourceapi.PodSchedulingContext{schedulingInfo},
|
||||
classes: []*resourceapi.ResourceClass{resourceClass},
|
||||
classes: []*resourceapi.DeviceClass{deviceClass},
|
||||
prepare: prepare{
|
||||
prebind: change{
|
||||
scheduling: func(in *resourceapi.PodSchedulingContext) *resourceapi.PodSchedulingContext {
|
||||
@@ -923,7 +781,7 @@ func TestPlugin(t *testing.T) {
|
||||
pod: podWithClaimName,
|
||||
claims: []*resourceapi.ResourceClaim{allocatedClaim},
|
||||
schedulings: []*resourceapi.PodSchedulingContext{schedulingInfo},
|
||||
classes: []*resourceapi.ResourceClass{resourceClass},
|
||||
classes: []*resourceapi.DeviceClass{deviceClass},
|
||||
want: want{
|
||||
prebind: result{
|
||||
changes: change{
|
||||
@@ -967,7 +825,7 @@ func TestPlugin(t *testing.T) {
|
||||
// PostFilter tries to get the pod scheduleable by
|
||||
// deallocating the claim.
|
||||
pod: podWithClaimName,
|
||||
claims: []*resourceapi.ResourceClaim{structuredAllocatedClaimWithWrongTopology},
|
||||
claims: []*resourceapi.ResourceClaim{structuredClaim(allocatedClaimWithWrongTopology)},
|
||||
want: want{
|
||||
filter: perNodeResult{
|
||||
workerNode.Name: {
|
||||
@@ -979,7 +837,7 @@ func TestPlugin(t *testing.T) {
|
||||
changes: change{
|
||||
claim: func(in *resourceapi.ResourceClaim) *resourceapi.ResourceClaim {
|
||||
return st.FromResourceClaim(in).
|
||||
Allocation("", nil).
|
||||
Allocation(nil).
|
||||
Obj()
|
||||
},
|
||||
},
|
||||
@@ -1028,7 +886,7 @@ func TestPlugin(t *testing.T) {
|
||||
},
|
||||
"bind-failure-structured": {
|
||||
pod: podWithClaimName,
|
||||
claims: []*resourceapi.ResourceClaim{structuredAllocatedClaimWithGoodTopology},
|
||||
claims: []*resourceapi.ResourceClaim{structuredClaim(allocatedClaimWithGoodTopology)},
|
||||
want: want{
|
||||
prebind: result{
|
||||
changes: change{
|
||||
@@ -1109,15 +967,20 @@ func TestPlugin(t *testing.T) {
|
||||
t.Run(fmt.Sprintf("filter/%s", nodeInfo.Node().Name), func(t *testing.T) {
|
||||
testCtx.verify(t, tc.want.filter.forNode(nodeName), initialObjects, nil, status)
|
||||
})
|
||||
if status.Code() != framework.Success {
|
||||
unschedulable = true
|
||||
} else {
|
||||
if status.Code() == framework.Success {
|
||||
potentialNodes = append(potentialNodes, nodeInfo)
|
||||
}
|
||||
if status.Code() == framework.Error {
|
||||
// An error aborts scheduling.
|
||||
return
|
||||
}
|
||||
}
|
||||
if len(potentialNodes) == 0 {
|
||||
unschedulable = true
|
||||
}
|
||||
}
|
||||
|
||||
if !unschedulable && len(potentialNodes) > 0 {
|
||||
if !unschedulable && len(potentialNodes) > 1 {
|
||||
initialObjects = testCtx.listAll(t)
|
||||
initialObjects = testCtx.updateAPIServer(t, initialObjects, tc.prepare.prescore)
|
||||
status := testCtx.p.PreScore(testCtx.ctx, testCtx.state, tc.pod, potentialNodes)
|
||||
@@ -1184,7 +1047,7 @@ func TestPlugin(t *testing.T) {
|
||||
})
|
||||
}
|
||||
}
|
||||
} else {
|
||||
} else if len(potentialNodes) == 0 {
|
||||
initialObjects = testCtx.listAll(t)
|
||||
initialObjects = testCtx.updateAPIServer(t, initialObjects, tc.prepare.postfilter)
|
||||
result, status := testCtx.p.PostFilter(testCtx.ctx, testCtx.state, tc.pod, nil /* filteredNodeStatusMap not used by plugin */)
|
||||
@@ -1209,7 +1072,12 @@ type testContext struct {
|
||||
|
||||
func (tc *testContext) verify(t *testing.T, expected result, initialObjects []metav1.Object, result interface{}, status *framework.Status) {
|
||||
t.Helper()
|
||||
assert.Equal(t, expected.status, status)
|
||||
if expectedErr := status.AsError(); expectedErr != nil {
|
||||
// Compare only the error strings.
|
||||
assert.ErrorContains(t, status.AsError(), expectedErr.Error())
|
||||
} else {
|
||||
assert.Equal(t, expected.status, status)
|
||||
}
|
||||
objects := tc.listAll(t)
|
||||
wantObjects := update(t, initialObjects, expected.changes)
|
||||
wantObjects = append(wantObjects, expected.added...)
|
||||
@@ -1351,7 +1219,7 @@ func update(t *testing.T, objects []metav1.Object, updates change) []metav1.Obje
|
||||
return updated
|
||||
}
|
||||
|
||||
func setup(t *testing.T, nodes []*v1.Node, claims []*resourceapi.ResourceClaim, classes []*resourceapi.ResourceClass, schedulings []*resourceapi.PodSchedulingContext, objs []apiruntime.Object) (result *testContext) {
|
||||
func setup(t *testing.T, nodes []*v1.Node, claims []*resourceapi.ResourceClaim, classes []*resourceapi.DeviceClass, schedulings []*resourceapi.PodSchedulingContext, objs []apiruntime.Object) (result *testContext) {
|
||||
t.Helper()
|
||||
|
||||
tc := &testContext{}
|
||||
@@ -1387,7 +1255,7 @@ func setup(t *testing.T, nodes []*v1.Node, claims []*resourceapi.ResourceClaim,
|
||||
require.NoError(t, err, "create resource claim")
|
||||
}
|
||||
for _, class := range classes {
|
||||
_, err := tc.client.ResourceV1alpha3().ResourceClasses().Create(tc.ctx, class, metav1.CreateOptions{})
|
||||
_, err := tc.client.ResourceV1alpha3().DeviceClasses().Create(tc.ctx, class, metav1.CreateOptions{})
|
||||
require.NoError(t, err, "create resource class")
|
||||
}
|
||||
for _, scheduling := range schedulings {
|
||||
@@ -1552,10 +1420,10 @@ func Test_isSchedulableAfterClaimChange(t *testing.T) {
|
||||
},
|
||||
"structured-claim-deallocate": {
|
||||
pod: podWithClaimName,
|
||||
claims: []*resourceapi.ResourceClaim{pendingClaim, otherStructuredAllocatedClaim},
|
||||
oldObj: otherStructuredAllocatedClaim,
|
||||
claims: []*resourceapi.ResourceClaim{pendingClaim, structuredClaim(otherAllocatedClaim)},
|
||||
oldObj: structuredClaim(otherAllocatedClaim),
|
||||
newObj: func() *resourceapi.ResourceClaim {
|
||||
claim := otherStructuredAllocatedClaim.DeepCopy()
|
||||
claim := structuredClaim(otherAllocatedClaim).DeepCopy()
|
||||
claim.Status.Allocation = nil
|
||||
return claim
|
||||
}(),
|
||||
|
||||
Reference in New Issue
Block a user