diff --git a/pkg/apis/core/validation/validation.go b/pkg/apis/core/validation/validation.go index e4f936e3449..d1866add3bd 100644 --- a/pkg/apis/core/validation/validation.go +++ b/pkg/apis/core/validation/validation.go @@ -5847,6 +5847,15 @@ func ValidatePodStatusUpdate(newPod, oldPod *core.Pod, opts PodValidationOptions } } + // Prevent setting NominatedNodeName on already bound pods + if utilfeature.DefaultFeatureGate.Enabled(features.ClearingNominatedNodeNameAfterBinding) && + oldPod.Spec.NodeName != "" && + newPod.Status.NominatedNodeName != "" && + newPod.Status.NominatedNodeName != oldPod.Status.NominatedNodeName { + allErrs = append(allErrs, field.Forbidden(fldPath.Child("nominatedNodeName"), + "may not be set on pods that are already bound to a node")) + } + if newPod.Status.ObservedGeneration < 0 { allErrs = append(allErrs, field.Invalid(fldPath.Child("observedGeneration"), newPod.Status.ObservedGeneration, "must be a non-negative integer")) } diff --git a/pkg/apis/core/validation/validation_test.go b/pkg/apis/core/validation/validation_test.go index ef9cdf42e03..1b7fb3cc3b5 100644 --- a/pkg/apis/core/validation/validation_test.go +++ b/pkg/apis/core/validation/validation_test.go @@ -14498,21 +14498,21 @@ func TestValidatePodUpdate(t *testing.T) { func TestValidatePodStatusUpdate(t *testing.T) { tests := []struct { - new core.Pod - old core.Pod - err string - test string + test string + new core.Pod + old core.Pod + err string + enableClearingNominatedNodeNameAfterBinding bool }{{ - *podtest.MakePod("foo", + new: *podtest.MakePod("foo", podtest.SetStatus(core.PodStatus{ ObservedGeneration: 1, }), ), - *podtest.MakePod("foo"), - "", - "set valid status.observedGeneration", + old: *podtest.MakePod("foo"), + test: "set valid status.observedGeneration", }, { - *podtest.MakePod("foo", + new: *podtest.MakePod("foo", podtest.SetStatus(core.PodStatus{ Conditions: []core.PodCondition{{ Type: core.PodScheduled, @@ -14521,20 +14521,19 @@ func TestValidatePodStatusUpdate(t *testing.T) { }}, }), ), - *podtest.MakePod("foo"), - "", - "set valid condition.observedGeneration", + old: *podtest.MakePod("foo"), + test: "set valid condition.observedGeneration", }, { - *podtest.MakePod("foo", + new: *podtest.MakePod("foo", podtest.SetStatus(core.PodStatus{ ObservedGeneration: -1, }), ), - *podtest.MakePod("foo"), - "status.observedGeneration: Invalid value: -1: must be a non-negative integer", - "set invalid status.observedGeneration", + old: *podtest.MakePod("foo"), + err: "status.observedGeneration: Invalid value: -1: must be a non-negative integer", + test: "set invalid status.observedGeneration", }, { - *podtest.MakePod("foo", + new: *podtest.MakePod("foo", podtest.SetStatus(core.PodStatus{ Conditions: []core.PodCondition{{ Type: core.PodScheduled, @@ -14543,63 +14542,154 @@ func TestValidatePodStatusUpdate(t *testing.T) { }}, }), ), - *podtest.MakePod("foo"), - "status.conditions[0].observedGeneration: Invalid value: -1: must be a non-negative integer", - "set invalid condition.observedGeneration", + old: *podtest.MakePod("foo"), + err: "status.conditions[0].observedGeneration: Invalid value: -1: must be a non-negative integer", + test: "set invalid condition.observedGeneration", }, { - *podtest.MakePod("foo", + new: *podtest.MakePod("foo", podtest.SetNodeName("node1"), podtest.SetStatus(core.PodStatus{ NominatedNodeName: "node1", }), ), - *podtest.MakePod("foo", + old: *podtest.MakePod("foo", podtest.SetNodeName("node1"), podtest.SetStatus(core.PodStatus{}), ), - "", - "removed nominatedNodeName", + test: "add valid nominatedNodeName", }, { - *podtest.MakePod("foo", + new: *podtest.MakePod("foo", podtest.SetNodeName("node1"), ), - *podtest.MakePod("foo", + old: *podtest.MakePod("foo", podtest.SetNodeName("node1"), podtest.SetStatus(core.PodStatus{ NominatedNodeName: "node1", }), ), - "", - "add valid nominatedNodeName", + test: "remove nominatedNodeName", }, { - *podtest.MakePod("foo", + new: *podtest.MakePod("foo", podtest.SetNodeName("node1"), podtest.SetStatus(core.PodStatus{ NominatedNodeName: "Node1", }), ), - *podtest.MakePod("foo", + old: *podtest.MakePod("foo", podtest.SetNodeName("node1"), ), - "nominatedNodeName", - "Add invalid nominatedNodeName", + err: "nominatedNodeName", + test: "Add invalid nominatedNodeName", }, { - *podtest.MakePod("foo", + new: *podtest.MakePod("foo", podtest.SetNodeName("node1"), podtest.SetStatus(core.PodStatus{ NominatedNodeName: "node1", }), ), - *podtest.MakePod("foo", + old: *podtest.MakePod("foo", podtest.SetNodeName("node1"), podtest.SetStatus(core.PodStatus{ NominatedNodeName: "node2", }), ), - "", - "Update nominatedNodeName", + test: "Update nominatedNodeName", }, { - *podtest.MakePod("foo", + new: *podtest.MakePod("foo", + podtest.SetStatus(core.PodStatus{ + NominatedNodeName: "node1", + }), + ), + old: *podtest.MakePod("foo"), + enableClearingNominatedNodeNameAfterBinding: true, + test: "allow setting NominatedNodeName on unbound pod with feature enabled", + }, { + new: *podtest.MakePod("foo", + podtest.SetNodeName("node1"), + ), + old: *podtest.MakePod("foo", + podtest.SetNodeName("node1"), + podtest.SetStatus(core.PodStatus{ + NominatedNodeName: "node2", + }), + ), + enableClearingNominatedNodeNameAfterBinding: true, + test: "allow clearing NominatedNodeName on bound pod with feature enabled", + }, { + new: *podtest.MakePod("foo", + podtest.SetNodeName("node1"), + podtest.SetStatus(core.PodStatus{ + NominatedNodeName: "node2", + }), + ), + old: *podtest.MakePod("foo", + podtest.SetNodeName("node1"), + ), + err: "may not be set on pods that are already bound to a node", + enableClearingNominatedNodeNameAfterBinding: true, + test: "prevent setting NominatedNodeName on already bound pod with feature enabled", + }, { + new: *podtest.MakePod("foo", + podtest.SetNodeName("node1"), + podtest.SetStatus(core.PodStatus{ + NominatedNodeName: "node1", + }), + ), + old: *podtest.MakePod("foo", + podtest.SetNodeName("node1"), + ), + err: "may not be set on pods that are already bound to a node", + enableClearingNominatedNodeNameAfterBinding: true, + test: "prevent setting NominatedNodeName to same node on already bound pod with feature enabled", + }, { + new: *podtest.MakePod("foo", + podtest.SetNodeName("node1"), + podtest.SetStatus(core.PodStatus{ + NominatedNodeName: "node3", + }), + ), + old: *podtest.MakePod("foo", + podtest.SetNodeName("node1"), + podtest.SetStatus(core.PodStatus{ + NominatedNodeName: "node2", + }), + ), + err: "may not be set on pods that are already bound to a node", + enableClearingNominatedNodeNameAfterBinding: true, + test: "prevent updating NominatedNodeName on already bound pod with feature enabled", + }, { + new: *podtest.MakePod("foo", + podtest.SetNodeName("node1"), + podtest.SetStatus(core.PodStatus{ + NominatedNodeName: "node1", + }), + ), + old: *podtest.MakePod("foo", + podtest.SetNodeName("node1"), + podtest.SetStatus(core.PodStatus{ + NominatedNodeName: "node2", + }), + ), + err: "may not be set on pods that are already bound to a node", + enableClearingNominatedNodeNameAfterBinding: true, + test: "prevent updating NominatedNodeName to same node on already bound pod with feature enabled", + }, { + new: *podtest.MakePod("foo", + podtest.SetNodeName("node1"), + podtest.SetStatus(core.PodStatus{ + NominatedNodeName: "node2", + }), + ), + old: *podtest.MakePod("foo", + podtest.SetNodeName("node1"), + podtest.SetStatus(core.PodStatus{ + NominatedNodeName: "node2", + }), + ), + enableClearingNominatedNodeNameAfterBinding: true, + test: "allow keeping same NominatedNodeName on bound pod with feature enabled", + }, { + new: *podtest.MakePod("foo", podtest.SetStatus(core.PodStatus{ InitContainerStatuses: []core.ContainerStatus{{ ContainerID: "docker://numbers", @@ -14627,11 +14717,10 @@ func TestValidatePodStatusUpdate(t *testing.T) { }}, }), ), - *podtest.MakePod("foo"), - "", - "Container statuses pending", + old: *podtest.MakePod("foo"), + test: "Container statuses pending", }, { - *podtest.MakePod("foo", + new: *podtest.MakePod("foo", podtest.SetStatus(core.PodStatus{ InitContainerStatuses: []core.ContainerStatus{{ ContainerID: "docker://numbers", @@ -14661,7 +14750,7 @@ func TestValidatePodStatusUpdate(t *testing.T) { }}, }), ), - *podtest.MakePod("foo", + old: *podtest.MakePod("foo", podtest.SetStatus(core.PodStatus{ InitContainerStatuses: []core.ContainerStatus{{ ContainerID: "docker://numbers", @@ -14688,10 +14777,9 @@ func TestValidatePodStatusUpdate(t *testing.T) { }}, }), ), - "", - "Container statuses running", + test: "Container statuses running", }, { - *podtest.MakePod("foo", + new: *podtest.MakePod("foo", podtest.SetStatus(core.PodStatus{ ContainerStatuses: []core.ContainerStatus{{ ContainerID: "docker://numbers", @@ -14719,7 +14807,7 @@ func TestValidatePodStatusUpdate(t *testing.T) { }}, }), ), - *podtest.MakePod("foo", + old: *podtest.MakePod("foo", podtest.SetStatus(core.PodStatus{ ContainerStatuses: []core.ContainerStatus{{ ContainerID: "docker://numbers", @@ -14736,10 +14824,9 @@ func TestValidatePodStatusUpdate(t *testing.T) { }}, }), ), - "", - "Container statuses add ephemeral container", + test: "Container statuses add ephemeral container", }, { - *podtest.MakePod("foo", + new: *podtest.MakePod("foo", podtest.SetStatus(core.PodStatus{ ContainerStatuses: []core.ContainerStatus{{ ContainerID: "docker://numbers", @@ -14768,7 +14855,7 @@ func TestValidatePodStatusUpdate(t *testing.T) { }}, }), ), - *podtest.MakePod("foo", + old: *podtest.MakePod("foo", podtest.SetStatus(core.PodStatus{ ContainerStatuses: []core.ContainerStatus{{ ContainerID: "docker://numbers", @@ -14796,10 +14883,9 @@ func TestValidatePodStatusUpdate(t *testing.T) { }}, }), ), - "", - "Container statuses ephemeral container running", + test: "Container statuses ephemeral container running", }, { - *podtest.MakePod("foo", + new: *podtest.MakePod("foo", podtest.SetStatus(core.PodStatus{ ContainerStatuses: []core.ContainerStatus{{ ContainerID: "docker://numbers", @@ -14831,7 +14917,7 @@ func TestValidatePodStatusUpdate(t *testing.T) { }}, }), ), - *podtest.MakePod("foo", + old: *podtest.MakePod("foo", podtest.SetStatus(core.PodStatus{ ContainerStatuses: []core.ContainerStatus{{ ContainerID: "docker://numbers", @@ -14860,10 +14946,9 @@ func TestValidatePodStatusUpdate(t *testing.T) { }}, }), ), - "", - "Container statuses ephemeral container exited", + test: "Container statuses ephemeral container exited", }, { - *podtest.MakePod("foo", + new: *podtest.MakePod("foo", podtest.SetStatus(core.PodStatus{ InitContainerStatuses: []core.ContainerStatus{{ ContainerID: "docker://numbers", @@ -14911,7 +14996,7 @@ func TestValidatePodStatusUpdate(t *testing.T) { }}, }), ), - *podtest.MakePod("foo", + old: *podtest.MakePod("foo", podtest.SetStatus(core.PodStatus{ InitContainerStatuses: []core.ContainerStatus{{ ContainerID: "docker://numbers", @@ -14953,21 +15038,20 @@ func TestValidatePodStatusUpdate(t *testing.T) { }}, }), ), - "", - "Container statuses all containers terminated", + test: "Container statuses all containers terminated", }, { - *podtest.MakePod("foo", + new: *podtest.MakePod("foo", podtest.SetStatus(core.PodStatus{ ResourceClaimStatuses: []core.PodResourceClaimStatus{ {Name: "no-such-claim", ResourceClaimName: ptr.To("my-claim")}, }, }), ), - *podtest.MakePod("foo"), - "status.resourceClaimStatuses[0].name: Invalid value: \"no-such-claim\": must match the name of an entry in `spec.resourceClaims`", - "Non-existent PodResourceClaim", + old: *podtest.MakePod("foo"), + err: "status.resourceClaimStatuses[0].name: Invalid value: \"no-such-claim\": must match the name of an entry in `spec.resourceClaims`", + test: "Non-existent PodResourceClaim", }, { - *podtest.MakePod("foo", + new: *podtest.MakePod("foo", podtest.SetResourceClaims(core.PodResourceClaim{Name: "my-claim"}), podtest.SetStatus(core.PodStatus{ ResourceClaimStatuses: []core.PodResourceClaimStatus{ @@ -14975,13 +15059,13 @@ func TestValidatePodStatusUpdate(t *testing.T) { }, }), ), - *podtest.MakePod("foo", + old: *podtest.MakePod("foo", podtest.SetResourceClaims(core.PodResourceClaim{Name: "my-claim"}), ), - `status.resourceClaimStatuses[0].name: Invalid value: "%$!#": a lowercase RFC 1123 subdomain must consist of`, - "Invalid ResourceClaim name", + err: `status.resourceClaimStatuses[0].name: Invalid value: "%$!#": a lowercase RFC 1123 subdomain must consist of`, + test: "Invalid ResourceClaim name", }, { - *podtest.MakePod("foo", + new: *podtest.MakePod("foo", podtest.SetResourceClaims( core.PodResourceClaim{Name: "my-claim"}, core.PodResourceClaim{Name: "my-other-claim"}, @@ -14994,13 +15078,13 @@ func TestValidatePodStatusUpdate(t *testing.T) { }, }), ), - *podtest.MakePod("foo", + old: *podtest.MakePod("foo", podtest.SetResourceClaims(core.PodResourceClaim{Name: "my-claim"}), ), - `status.resourceClaimStatuses[2].name: Duplicate value: "my-other-claim"`, - "Duplicate ResourceClaimStatuses.Name", + err: `status.resourceClaimStatuses[2].name: Duplicate value: "my-other-claim"`, + test: "Duplicate ResourceClaimStatuses.Name", }, { - *podtest.MakePod("foo", + new: *podtest.MakePod("foo", podtest.SetResourceClaims( core.PodResourceClaim{Name: "my-claim"}, core.PodResourceClaim{Name: "my-other-claim"}, @@ -15012,13 +15096,12 @@ func TestValidatePodStatusUpdate(t *testing.T) { }, }), ), - *podtest.MakePod("foo", + old: *podtest.MakePod("foo", podtest.SetResourceClaims(core.PodResourceClaim{Name: "my-claim"}), ), - "", - "ResourceClaimStatuses okay", + test: "ResourceClaimStatuses okay", }, { - *podtest.MakePod("foo", + new: *podtest.MakePod("foo", podtest.SetInitContainers(podtest.MakeContainer("init")), podtest.SetStatus(core.PodStatus{ InitContainerStatuses: []core.ContainerStatus{{ @@ -15048,7 +15131,7 @@ func TestValidatePodStatusUpdate(t *testing.T) { }}, }), ), - *podtest.MakePod("foo", + old: *podtest.MakePod("foo", podtest.SetInitContainers(podtest.MakeContainer("init")), podtest.SetRestartPolicy(core.RestartPolicyNever), podtest.SetStatus(core.PodStatus{ @@ -15080,10 +15163,10 @@ func TestValidatePodStatusUpdate(t *testing.T) { }}, }), ), - `status.initContainerStatuses[0].state: Forbidden: may not be transitioned to non-terminated state`, - "init container cannot restart if RestartPolicyNever", + err: `status.initContainerStatuses[0].state: Forbidden: may not be transitioned to non-terminated state`, + test: "init container cannot restart if RestartPolicyNever", }, { - *podtest.MakePod("foo", + new: *podtest.MakePod("foo", podtest.SetInitContainers(podtest.MakeContainer("restartable-init", podtest.SetContainerRestartPolicy(containerRestartPolicyAlways))), podtest.SetRestartPolicy(core.RestartPolicyNever), podtest.SetStatus(core.PodStatus{ @@ -15114,7 +15197,7 @@ func TestValidatePodStatusUpdate(t *testing.T) { }}, }), ), - *podtest.MakePod("foo", + old: *podtest.MakePod("foo", podtest.SetInitContainers(podtest.MakeContainer("restartable-init", podtest.SetContainerRestartPolicy(containerRestartPolicyAlways))), podtest.SetRestartPolicy(core.RestartPolicyNever), podtest.SetStatus(core.PodStatus{ @@ -15146,10 +15229,9 @@ func TestValidatePodStatusUpdate(t *testing.T) { }}, }), ), - "", - "restartable init container can restart if RestartPolicyNever", + test: "restartable init container can restart if RestartPolicyNever", }, { - *podtest.MakePod("foo", + new: *podtest.MakePod("foo", podtest.SetInitContainers(podtest.MakeContainer("restartable-init", podtest.SetContainerRestartPolicy(containerRestartPolicyAlways))), podtest.SetRestartPolicy(core.RestartPolicyOnFailure), podtest.SetStatus(core.PodStatus{ @@ -15180,7 +15262,7 @@ func TestValidatePodStatusUpdate(t *testing.T) { }}, }), ), - *podtest.MakePod("foo", + old: *podtest.MakePod("foo", podtest.SetInitContainers(podtest.MakeContainer("restartable-init", podtest.SetContainerRestartPolicy(containerRestartPolicyAlways))), podtest.SetRestartPolicy(core.RestartPolicyOnFailure), podtest.SetStatus(core.PodStatus{ @@ -15212,10 +15294,9 @@ func TestValidatePodStatusUpdate(t *testing.T) { }}, }), ), - "", - "restartable init container can restart if RestartPolicyOnFailure", + test: "restartable init container can restart if RestartPolicyOnFailure", }, { - *podtest.MakePod("foo", + new: *podtest.MakePod("foo", podtest.SetInitContainers(podtest.MakeContainer("restartable-init", podtest.SetContainerRestartPolicy(containerRestartPolicyAlways))), podtest.SetRestartPolicy(core.RestartPolicyAlways), podtest.SetStatus(core.PodStatus{ @@ -15246,7 +15327,7 @@ func TestValidatePodStatusUpdate(t *testing.T) { }}, }), ), - *podtest.MakePod("foo", + old: *podtest.MakePod("foo", podtest.SetInitContainers(podtest.MakeContainer("restartable-init", podtest.SetContainerRestartPolicy(containerRestartPolicyAlways))), podtest.SetRestartPolicy(core.RestartPolicyAlways), podtest.SetStatus(core.PodStatus{ @@ -15278,47 +15359,44 @@ func TestValidatePodStatusUpdate(t *testing.T) { }}, }), ), - "", - "restartable init container can restart if RestartPolicyAlways", + test: "restartable init container can restart if RestartPolicyAlways", }, { - *podtest.MakePod("foo", + new: *podtest.MakePod("foo", podtest.SetStatus(core.PodStatus{ QOSClass: core.PodQOSBurstable, }), ), - *podtest.MakePod("foo", + old: *podtest.MakePod("foo", podtest.SetStatus(core.PodStatus{ QOSClass: core.PodQOSGuaranteed, }), ), - "tatus.qosClass: Invalid value: \"Burstable\": field is immutable", - "qosClass can not be changed", + err: "tatus.qosClass: Invalid value: \"Burstable\": field is immutable", + test: "qosClass can not be changed", }, { - *podtest.MakePod("foo", + new: *podtest.MakePod("foo", podtest.SetStatus(core.PodStatus{ QOSClass: core.PodQOSBurstable, }), ), - *podtest.MakePod("foo", + old: *podtest.MakePod("foo", podtest.SetStatus(core.PodStatus{ QOSClass: core.PodQOSBurstable, }), ), - "", - "qosClass no change", + test: "qosClass no change", }, { - *podtest.MakePod("foo", + new: *podtest.MakePod("foo", podtest.SetStatus( podtest.MakePodStatus( podtest.SetContainerStatuses(core.ContainerStatus{}), ), ), ), - *podtest.MakePod("foo"), - "", - "nil containerUser in containerStatuses", + old: *podtest.MakePod("foo"), + test: "nil containerUser in containerStatuses", }, { - *podtest.MakePod("foo", + new: *podtest.MakePod("foo", podtest.SetStatus( podtest.MakePodStatus( podtest.SetContainerStatuses(core.ContainerStatus{ @@ -15327,11 +15405,10 @@ func TestValidatePodStatusUpdate(t *testing.T) { ), ), ), - *podtest.MakePod("foo"), - "", - "empty containerUser in containerStatuses", + old: *podtest.MakePod("foo"), + test: "empty containerUser in containerStatuses", }, { - *podtest.MakePod("foo", + new: *podtest.MakePod("foo", podtest.SetStatus( podtest.MakePodStatus( podtest.SetContainerStatuses(core.ContainerStatus{ @@ -15346,11 +15423,10 @@ func TestValidatePodStatusUpdate(t *testing.T) { ), ), ), - *podtest.MakePod("foo"), - "", - "containerUser with valid ids in containerStatuses", + old: *podtest.MakePod("foo"), + test: "containerUser with valid ids in containerStatuses", }, { - *podtest.MakePod("foo", + new: *podtest.MakePod("foo", podtest.SetStatus( podtest.MakePodStatus( podtest.SetContainerStatuses(core.ContainerStatus{ @@ -15365,13 +15441,13 @@ func TestValidatePodStatusUpdate(t *testing.T) { ), ), ), - *podtest.MakePod("foo"), - `status.containerStatuses[0].user.linux.uid: Invalid value: -1: must be between 0 and 2147483647, inclusive` + + old: *podtest.MakePod("foo"), + err: `status.containerStatuses[0].user.linux.uid: Invalid value: -1: must be between 0 and 2147483647, inclusive` + `, status.containerStatuses[0].user.linux.gid: Invalid value: -1: must be between 0 and 2147483647, inclusive` + `, status.containerStatuses[0].user.linux.supplementalGroups[0]: Invalid value: -1: must be between 0 and 2147483647, inclusive`, - "containerUser with invalid uids/gids/supplementalGroups in containerStatuses", + test: "containerUser with invalid uids/gids/supplementalGroups in containerStatuses", }, { - *podtest.MakePod("foo", + new: *podtest.MakePod("foo", podtest.SetOS(core.Windows), podtest.SetStatus( podtest.MakePodStatus( @@ -15383,25 +15459,23 @@ func TestValidatePodStatusUpdate(t *testing.T) { ), ), ), - *podtest.MakePod("foo", + old: *podtest.MakePod("foo", podtest.SetOS(core.Windows), ), - `status.containerStatuses[0].user.linux: Forbidden: cannot be set for a windows pod`, - "containerUser cannot be set for windows pod in containerStatuses", + err: `status.containerStatuses[0].user.linux: Forbidden: cannot be set for a windows pod`, + test: "containerUser cannot be set for windows pod in containerStatuses", }, { - - *podtest.MakePod("foo", + new: *podtest.MakePod("foo", podtest.SetStatus( podtest.MakePodStatus( podtest.SetInitContainerStatuses(core.ContainerStatus{}), ), ), ), - *podtest.MakePod("foo"), - "", - "nil containerUser in initContainerStatuses", + old: *podtest.MakePod("foo"), + test: "nil containerUser in initContainerStatuses", }, { - *podtest.MakePod("foo", + new: *podtest.MakePod("foo", podtest.SetStatus( podtest.MakePodStatus( podtest.SetInitContainerStatuses(core.ContainerStatus{ @@ -15410,11 +15484,10 @@ func TestValidatePodStatusUpdate(t *testing.T) { ), ), ), - *podtest.MakePod("foo"), - "", - "empty containerUser in initContainerStatuses", + old: *podtest.MakePod("foo"), + test: "empty containerUser in initContainerStatuses", }, { - *podtest.MakePod("foo", + new: *podtest.MakePod("foo", podtest.SetStatus( podtest.MakePodStatus( podtest.SetInitContainerStatuses(core.ContainerStatus{ @@ -15429,11 +15502,10 @@ func TestValidatePodStatusUpdate(t *testing.T) { ), ), ), - *podtest.MakePod("foo"), - "", - "containerUser with valid ids in initContainerStatuses", + old: *podtest.MakePod("foo"), + test: "containerUser with valid ids in initContainerStatuses", }, { - *podtest.MakePod("foo", + new: *podtest.MakePod("foo", podtest.SetStatus( podtest.MakePodStatus( podtest.SetInitContainerStatuses(core.ContainerStatus{ @@ -15448,13 +15520,13 @@ func TestValidatePodStatusUpdate(t *testing.T) { ), ), ), - *podtest.MakePod("foo"), - `status.initContainerStatuses[0].user.linux.uid: Invalid value: -1: must be between 0 and 2147483647, inclusive` + + old: *podtest.MakePod("foo"), + err: `status.initContainerStatuses[0].user.linux.uid: Invalid value: -1: must be between 0 and 2147483647, inclusive` + `, status.initContainerStatuses[0].user.linux.gid: Invalid value: -1: must be between 0 and 2147483647, inclusive` + `, status.initContainerStatuses[0].user.linux.supplementalGroups[0]: Invalid value: -1: must be between 0 and 2147483647, inclusive`, - "containerUser with invalid uids/gids/supplementalGroups in initContainerStatuses", + test: "containerUser with invalid uids/gids/supplementalGroups in initContainerStatuses", }, { - *podtest.MakePod("foo", + new: *podtest.MakePod("foo", podtest.SetOS(core.Windows), podtest.SetStatus( podtest.MakePodStatus( @@ -15466,24 +15538,23 @@ func TestValidatePodStatusUpdate(t *testing.T) { ), ), ), - *podtest.MakePod("foo", + old: *podtest.MakePod("foo", podtest.SetOS(core.Windows), ), - `status.initContainerStatuses[0].user.linux: Forbidden: cannot be set for a windows pod`, - "containerUser cannot be set for windows pod in initContainerStatuses", + err: `status.initContainerStatuses[0].user.linux: Forbidden: cannot be set for a windows pod`, + test: "containerUser cannot be set for windows pod in initContainerStatuses", }, { - *podtest.MakePod("foo", + new: *podtest.MakePod("foo", podtest.SetStatus( podtest.MakePodStatus( podtest.SetEphemeralContainerStatuses(core.ContainerStatus{}), ), ), ), - *podtest.MakePod("foo"), - "", - "nil containerUser in ephemeralContainerStatuses", + old: *podtest.MakePod("foo"), + test: "nil containerUser in ephemeralContainerStatuses", }, { - *podtest.MakePod("foo", + new: *podtest.MakePod("foo", podtest.SetStatus( podtest.MakePodStatus( podtest.SetEphemeralContainerStatuses(core.ContainerStatus{ @@ -15492,11 +15563,10 @@ func TestValidatePodStatusUpdate(t *testing.T) { ), ), ), - *podtest.MakePod("foo"), - "", - "empty containerUser in ephemeralContainerStatuses", + old: *podtest.MakePod("foo"), + test: "empty containerUser in ephemeralContainerStatuses", }, { - *podtest.MakePod("foo", + new: *podtest.MakePod("foo", podtest.SetStatus( podtest.MakePodStatus( podtest.SetEphemeralContainerStatuses(core.ContainerStatus{ @@ -15511,11 +15581,10 @@ func TestValidatePodStatusUpdate(t *testing.T) { ), ), ), - *podtest.MakePod("foo"), - "", - "containerUser with valid ids in ephemeralContainerStatuses", + old: *podtest.MakePod("foo"), + test: "containerUser with valid ids in ephemeralContainerStatuses", }, { - *podtest.MakePod("foo", + new: *podtest.MakePod("foo", podtest.SetStatus( podtest.MakePodStatus( podtest.SetEphemeralContainerStatuses(core.ContainerStatus{ @@ -15530,13 +15599,13 @@ func TestValidatePodStatusUpdate(t *testing.T) { ), ), ), - *podtest.MakePod("foo"), - `status.ephemeralContainerStatuses[0].user.linux.uid: Invalid value: -1: must be between 0 and 2147483647, inclusive` + + old: *podtest.MakePod("foo"), + err: `status.ephemeralContainerStatuses[0].user.linux.uid: Invalid value: -1: must be between 0 and 2147483647, inclusive` + `, status.ephemeralContainerStatuses[0].user.linux.gid: Invalid value: -1: must be between 0 and 2147483647, inclusive` + `, status.ephemeralContainerStatuses[0].user.linux.supplementalGroups[0]: Invalid value: -1: must be between 0 and 2147483647, inclusive`, - "containerUser with invalid uids/gids/supplementalGroups in ephemeralContainerStatuses", + test: "containerUser with invalid uids/gids/supplementalGroups in ephemeralContainerStatuses", }, { - *podtest.MakePod("foo", + new: *podtest.MakePod("foo", podtest.SetOS(core.Windows), podtest.SetStatus( podtest.MakePodStatus( @@ -15548,17 +15617,19 @@ func TestValidatePodStatusUpdate(t *testing.T) { ), ), ), - *podtest.MakePod("foo", + old: *podtest.MakePod("foo", podtest.SetOS(core.Windows), ), - `status.ephemeralContainerStatuses[0].user.linux: Forbidden: cannot be set for a windows pod`, - "containerUser cannot be set for windows pod in ephemeralContainerStatuses", + err: `status.ephemeralContainerStatuses[0].user.linux: Forbidden: cannot be set for a windows pod`, + test: "containerUser cannot be set for windows pod in ephemeralContainerStatuses", }, } for _, test := range tests { - test.new.ObjectMeta.ResourceVersion = "1" - test.old.ObjectMeta.ResourceVersion = "1" + featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.ClearingNominatedNodeNameAfterBinding, test.enableClearingNominatedNodeNameAfterBinding) + + test.new.ResourceVersion = "1" + test.old.ResourceVersion = "1" errs := ValidatePodStatusUpdate(&test.new, &test.old, PodValidationOptions{}) if test.err == "" { if len(errs) != 0 { diff --git a/pkg/features/kube_features.go b/pkg/features/kube_features.go index b9b10c72c64..8f47f18ff00 100644 --- a/pkg/features/kube_features.go +++ b/pkg/features/kube_features.go @@ -120,6 +120,13 @@ const ( // Enables kubelet to detect CSI volume condition and send the event of the abnormal volume to the corresponding pod that is using it. CSIVolumeHealth featuregate.Feature = "CSIVolumeHealth" + // owner: @sanposhiho @wojtek-t + // kep: https://kep.k8s.io/5278 + // + // Clear pod.Status.NominatedNodeName when pod is bound to a node. + // This prevents stale information from affecting external scheduling components. + ClearingNominatedNodeNameAfterBinding featuregate.Feature = "ClearingNominatedNodeNameAfterBinding" + // owner: @ahmedtd // // Enable ClusterTrustBundle objects and Kubelet integration. @@ -1139,6 +1146,10 @@ var defaultVersionedKubernetesFeatureGates = map[featuregate.Feature]featuregate {Version: version.MustParse("1.21"), Default: false, PreRelease: featuregate.Alpha}, }, + ClearingNominatedNodeNameAfterBinding: { + {Version: version.MustParse("1.34"), Default: true, PreRelease: featuregate.Beta}, + }, + ClusterTrustBundle: { {Version: version.MustParse("1.27"), Default: false, PreRelease: featuregate.Alpha}, {Version: version.MustParse("1.33"), Default: false, PreRelease: featuregate.Beta}, diff --git a/pkg/registry/core/pod/storage/storage.go b/pkg/registry/core/pod/storage/storage.go index d885b8bf046..37334a9e5b5 100644 --- a/pkg/registry/core/pod/storage/storage.go +++ b/pkg/registry/core/pod/storage/storage.go @@ -241,6 +241,10 @@ func (r *BindingREST) setPodNodeAndMetadata(ctx context.Context, podUID types.UI return nil, fmt.Errorf("pod %v has non-empty .spec.schedulingGates", pod.Name) } pod.Spec.NodeName = machine + // Clear nomination hint to prevent stale information affecting external components. + if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.ClearingNominatedNodeNameAfterBinding) { + pod.Status.NominatedNodeName = "" + } if pod.Annotations == nil { pod.Annotations = make(map[string]string) } @@ -256,6 +260,7 @@ func (r *BindingREST) setPodNodeAndMetadata(ctx context.Context, podUID types.UI Type: api.PodScheduled, Status: api.ConditionTrue, }) + finalPod = pod return pod, nil }), dryRun, nil) diff --git a/pkg/registry/core/pod/storage/storage_test.go b/pkg/registry/core/pod/storage/storage_test.go index a2a0ac6c0a1..cff8ef035e8 100644 --- a/pkg/registry/core/pod/storage/storage_test.go +++ b/pkg/registry/core/pod/storage/storage_test.go @@ -624,6 +624,63 @@ func TestEtcdCreate(t *testing.T) { } } +func TestEtcdCreateClearsNominatedNodeName(t *testing.T) { + for _, featureGateEnabled := range []bool{true, false} { + t.Run(fmt.Sprintf("FeatureGate=%v", featureGateEnabled), func(t *testing.T) { + featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, + kubefeatures.ClearingNominatedNodeNameAfterBinding, featureGateEnabled) + + storage, bindingStorage, statusStorage, server := newStorage(t) + defer server.Terminate(t) + defer storage.DestroyFunc() + ctx := genericapirequest.NewDefaultContext() + + pod := validNewPod() + created, err := storage.Create(ctx, pod, rest.ValidateAllObjectFunc, &metav1.CreateOptions{}) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + // Set NominatedNodeName via status update + createdPod := created.(*api.Pod) + createdPod.Status.NominatedNodeName = "nominated-node" + _, _, err = statusStorage.Update(ctx, createdPod.Name, rest.DefaultUpdatedObjectInfo(createdPod), + rest.ValidateAllObjectFunc, rest.ValidateAllObjectUpdateFunc, false, &metav1.UpdateOptions{}) + if err != nil { + t.Fatalf("unexpected error updating pod status: %v", err) + } + + // Bind the pod + _, err = bindingStorage.Create(ctx, "foo", &api.Binding{ + ObjectMeta: metav1.ObjectMeta{Namespace: metav1.NamespaceDefault, Name: "foo"}, + Target: api.ObjectReference{Name: "machine"}, + }, rest.ValidateAllObjectFunc, &metav1.CreateOptions{}) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + // Verify NominatedNodeName + obj, err := storage.Get(ctx, "foo", &metav1.GetOptions{}) + if err != nil { + t.Fatalf("Unexpected error %v", err) + } + boundPod := obj.(*api.Pod) + + if featureGateEnabled { + if boundPod.Status.NominatedNodeName != "" { + t.Errorf("expected NominatedNodeName to be cleared, but got: %s", + boundPod.Status.NominatedNodeName) + } + } else { + if boundPod.Status.NominatedNodeName != "nominated-node" { + t.Errorf("expected NominatedNodeName to be preserved, but got: %s", + boundPod.Status.NominatedNodeName) + } + } + }) + } +} + // Ensure that when scheduler creates a binding for a pod that has already been deleted // by the API server, API server returns not-found error. func TestEtcdCreateBindingNoPod(t *testing.T) { diff --git a/test/compatibility_lifecycle/reference/versioned_feature_list.yaml b/test/compatibility_lifecycle/reference/versioned_feature_list.yaml index 97cc26eeb7c..0a156adda03 100644 --- a/test/compatibility_lifecycle/reference/versioned_feature_list.yaml +++ b/test/compatibility_lifecycle/reference/versioned_feature_list.yaml @@ -179,6 +179,12 @@ lockToDefault: false preRelease: Alpha version: "1.32" +- name: ClearingNominatedNodeNameAfterBinding + versionedSpecs: + - default: true + lockToDefault: false + preRelease: Beta + version: "1.34" - name: CloudControllerManagerWebhook versionedSpecs: - default: false diff --git a/test/integration/scheduler/preemption/preemption_test.go b/test/integration/scheduler/preemption/preemption_test.go index c418b14f012..f00f817890a 100644 --- a/test/integration/scheduler/preemption/preemption_test.go +++ b/test/integration/scheduler/preemption/preemption_test.go @@ -422,73 +422,76 @@ func TestPreemption(t *testing.T) { for _, asyncPreemptionEnabled := range []bool{true, false} { for _, asyncAPICallsEnabled := range []bool{true, false} { - for _, test := range tests { - t.Run(fmt.Sprintf("%s (Async preemption enabled: %v, Async API calls enabled: %v)", test.name, asyncPreemptionEnabled, asyncAPICallsEnabled), func(t *testing.T) { - featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.SchedulerAsyncPreemption, asyncPreemptionEnabled) - featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.SchedulerAsyncAPICalls, asyncAPICallsEnabled) + for _, clearingNominatedNodeNameAfterBinding := range []bool{true, false} { + for _, test := range tests { + t.Run(fmt.Sprintf("%s (Async preemption enabled: %v, Async API calls enabled: %v, ClearingNominatedNodeNameAfterBinding: %v)", test.name, asyncPreemptionEnabled, asyncAPICallsEnabled, clearingNominatedNodeNameAfterBinding), func(t *testing.T) { + featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.SchedulerAsyncPreemption, asyncPreemptionEnabled) + featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.SchedulerAsyncAPICalls, asyncAPICallsEnabled) + featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.ClearingNominatedNodeNameAfterBinding, clearingNominatedNodeNameAfterBinding) - testCtx := testutils.InitTestSchedulerWithOptions(t, - testutils.InitTestAPIServer(t, "preemption", nil), - 0, - scheduler.WithProfiles(cfg.Profiles...), - scheduler.WithFrameworkOutOfTreeRegistry(registry)) - testutils.SyncSchedulerInformerFactory(testCtx) - go testCtx.Scheduler.Run(testCtx.Ctx) + testCtx := testutils.InitTestSchedulerWithOptions(t, + testutils.InitTestAPIServer(t, "preemption", nil), + 0, + scheduler.WithProfiles(cfg.Profiles...), + scheduler.WithFrameworkOutOfTreeRegistry(registry)) + testutils.SyncSchedulerInformerFactory(testCtx) + go testCtx.Scheduler.Run(testCtx.Ctx) - if _, err := createNode(testCtx.ClientSet, nodeObject); err != nil { - t.Fatalf("Error creating node: %v", err) - } - - cs := testCtx.ClientSet - - filter.Tokens = test.initTokens - filter.EnablePreFilter = test.enablePreFilter - filter.Unresolvable = test.unresolvable - pods := make([]*v1.Pod, len(test.existingPods)) - // Create and run existingPods. - for i, p := range test.existingPods { - p.Namespace = testCtx.NS.Name - pods[i], err = runPausePod(cs, p) - if err != nil { - t.Fatalf("Error running pause pod: %v", err) + if _, err := createNode(testCtx.ClientSet, nodeObject); err != nil { + t.Fatalf("Error creating node: %v", err) } - } - // Create the "pod". - test.pod.Namespace = testCtx.NS.Name - preemptor, err := createPausePod(cs, test.pod) - if err != nil { - t.Errorf("Error while creating high priority pod: %v", err) - } - // Wait for preemption of pods and make sure the other ones are not preempted. - for i, p := range pods { - if _, found := test.preemptedPodIndexes[i]; found { - if err = wait.PollUntilContextTimeout(testCtx.Ctx, time.Second, wait.ForeverTestTimeout, false, - podIsGettingEvicted(cs, p.Namespace, p.Name)); err != nil { - t.Errorf("Pod %v/%v is not getting evicted.", p.Namespace, p.Name) - } - pod, err := cs.CoreV1().Pods(p.Namespace).Get(testCtx.Ctx, p.Name, metav1.GetOptions{}) + + cs := testCtx.ClientSet + + filter.Tokens = test.initTokens + filter.EnablePreFilter = test.enablePreFilter + filter.Unresolvable = test.unresolvable + pods := make([]*v1.Pod, len(test.existingPods)) + // Create and run existingPods. + for i, p := range test.existingPods { + p.Namespace = testCtx.NS.Name + pods[i], err = runPausePod(cs, p) if err != nil { - t.Errorf("Error %v when getting the updated status for pod %v/%v ", err, p.Namespace, p.Name) + t.Fatalf("Error running pause pod: %v", err) } - _, cond := podutil.GetPodCondition(&pod.Status, v1.DisruptionTarget) - if cond == nil { - t.Errorf("Pod %q does not have the expected condition: %q", klog.KObj(pod), v1.DisruptionTarget) + } + // Create the "pod". + test.pod.Namespace = testCtx.NS.Name + preemptor, err := createPausePod(cs, test.pod) + if err != nil { + t.Errorf("Error while creating high priority pod: %v", err) + } + // Wait for preemption of pods and make sure the other ones are not preempted. + for i, p := range pods { + if _, found := test.preemptedPodIndexes[i]; found { + if err = wait.PollUntilContextTimeout(testCtx.Ctx, time.Second, wait.ForeverTestTimeout, false, + podIsGettingEvicted(cs, p.Namespace, p.Name)); err != nil { + t.Errorf("Pod %v/%v is not getting evicted.", p.Namespace, p.Name) + } + pod, err := cs.CoreV1().Pods(p.Namespace).Get(testCtx.Ctx, p.Name, metav1.GetOptions{}) + if err != nil { + t.Errorf("Error %v when getting the updated status for pod %v/%v ", err, p.Namespace, p.Name) + } + _, cond := podutil.GetPodCondition(&pod.Status, v1.DisruptionTarget) + if cond == nil { + t.Errorf("Pod %q does not have the expected condition: %q", klog.KObj(pod), v1.DisruptionTarget) + } + } else if p.DeletionTimestamp != nil { + t.Errorf("Didn't expect pod %v to get preempted.", p.Name) } - } else if p.DeletionTimestamp != nil { - t.Errorf("Didn't expect pod %v to get preempted.", p.Name) } - } - // Also check that the preemptor pod gets the NominatedNodeName field set. - if len(test.preemptedPodIndexes) > 0 { - if err := testutils.WaitForNominatedNodeName(testCtx.Ctx, cs, preemptor); err != nil { - t.Errorf("NominatedNodeName field was not set for pod %v: %v", preemptor.Name, err) + // Also check that the preemptor pod gets the NominatedNodeName field set. + if len(test.preemptedPodIndexes) > 0 && !clearingNominatedNodeNameAfterBinding { + if err := testutils.WaitForNominatedNodeName(testCtx.Ctx, cs, preemptor); err != nil { + t.Errorf("NominatedNodeName field was not set for pod %v: %v", preemptor.Name, err) + } } - } - // Cleanup - pods = append(pods, preemptor) - testutils.CleanupPods(testCtx.Ctx, cs, t, pods) - }) + // Cleanup + pods = append(pods, preemptor) + testutils.CleanupPods(testCtx.Ctx, cs, t, pods) + }) + } } } } @@ -1337,60 +1340,68 @@ func TestPreemptionStarvation(t *testing.T) { } for _, asyncPreemptionEnabled := range []bool{true, false} { - for _, test := range tests { - t.Run(fmt.Sprintf("%s (Async preemption enabled: %v)", test.name, asyncPreemptionEnabled), func(t *testing.T) { - featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.SchedulerAsyncPreemption, asyncPreemptionEnabled) + for _, clearingNominatedNodeNameAfterBinding := range []bool{true, false} { + for _, test := range tests { + t.Run(fmt.Sprintf("%s (Async preemption enabled: %v, ClearingNominatedNodeNameAfterBinding: %v)", test.name, asyncPreemptionEnabled, clearingNominatedNodeNameAfterBinding), func(t *testing.T) { + featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.SchedulerAsyncPreemption, asyncPreemptionEnabled) + featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.ClearingNominatedNodeNameAfterBinding, clearingNominatedNodeNameAfterBinding) - pendingPods := make([]*v1.Pod, test.numExpectedPending) - numRunningPods := test.numExistingPod - test.numExpectedPending - runningPods := make([]*v1.Pod, numRunningPods) - // Create and run existingPods. - for i := 0; i < numRunningPods; i++ { - runningPods[i], err = createPausePod(cs, mkPriorityPodWithGrace(testCtx, fmt.Sprintf("rpod-%v", i), mediumPriority, 0)) + pendingPods := make([]*v1.Pod, test.numExpectedPending) + numRunningPods := test.numExistingPod - test.numExpectedPending + runningPods := make([]*v1.Pod, numRunningPods) + // Create and run existingPods. + for i := 0; i < numRunningPods; i++ { + runningPods[i], err = createPausePod(cs, mkPriorityPodWithGrace(testCtx, fmt.Sprintf("rpod-%v", i), mediumPriority, 0)) + if err != nil { + t.Fatalf("Error creating pause pod: %v", err) + } + } + // make sure that runningPods are all scheduled. + for _, p := range runningPods { + if err := testutils.WaitForPodToSchedule(testCtx.Ctx, cs, p); err != nil { + t.Fatalf("Pod %v/%v didn't get scheduled: %v", p.Namespace, p.Name, err) + } + } + // Create pending pods. + for i := 0; i < test.numExpectedPending; i++ { + pendingPods[i], err = createPausePod(cs, mkPriorityPodWithGrace(testCtx, fmt.Sprintf("ppod-%v", i), mediumPriority, 0)) + if err != nil { + t.Fatalf("Error creating pending pod: %v", err) + } + } + // Make sure that all pending pods are being marked unschedulable. + for _, p := range pendingPods { + if err := wait.PollUntilContextTimeout(testCtx.Ctx, 100*time.Millisecond, wait.ForeverTestTimeout, false, + podUnschedulable(cs, p.Namespace, p.Name)); err != nil { + t.Errorf("Pod %v/%v didn't get marked unschedulable: %v", p.Namespace, p.Name, err) + } + } + // Create the preemptor. + preemptor, err := createPausePod(cs, test.preemptor) if err != nil { - t.Fatalf("Error creating pause pod: %v", err) + t.Errorf("Error while creating the preempting pod: %v", err) } - } - // make sure that runningPods are all scheduled. - for _, p := range runningPods { - if err := testutils.WaitForPodToSchedule(testCtx.Ctx, cs, p); err != nil { - t.Fatalf("Pod %v/%v didn't get scheduled: %v", p.Namespace, p.Name, err) + + // Make sure that preemptor is scheduled after preemptions. + if err := testutils.WaitForPodToScheduleWithTimeout(testCtx.Ctx, cs, preemptor, 60*time.Second); err != nil { + t.Errorf("Preemptor pod %v didn't get scheduled: %v", preemptor.Name, err) } - } - // Create pending pods. - for i := 0; i < test.numExpectedPending; i++ { - pendingPods[i], err = createPausePod(cs, mkPriorityPodWithGrace(testCtx, fmt.Sprintf("ppod-%v", i), mediumPriority, 0)) - if err != nil { - t.Fatalf("Error creating pending pod: %v", err) + + // Check if .status.nominatedNodeName of the preemptor pod gets set when feature gate is disabled. + // This test always expects preemption to occur since numExistingPod (10) fills the node completely. + if !clearingNominatedNodeNameAfterBinding { + if err := testutils.WaitForNominatedNodeName(testCtx.Ctx, cs, preemptor); err != nil { + t.Errorf(".status.nominatedNodeName was not set for pod %v/%v: %v", preemptor.Namespace, preemptor.Name, err) + } } - } - // Make sure that all pending pods are being marked unschedulable. - for _, p := range pendingPods { - if err := wait.PollUntilContextTimeout(testCtx.Ctx, 100*time.Millisecond, wait.ForeverTestTimeout, false, - podUnschedulable(cs, p.Namespace, p.Name)); err != nil { - t.Errorf("Pod %v/%v didn't get marked unschedulable: %v", p.Namespace, p.Name, err) - } - } - // Create the preemptor. - preemptor, err := createPausePod(cs, test.preemptor) - if err != nil { - t.Errorf("Error while creating the preempting pod: %v", err) - } - // Check if .status.nominatedNodeName of the preemptor pod gets set. - if err := testutils.WaitForNominatedNodeName(testCtx.Ctx, cs, preemptor); err != nil { - t.Errorf(".status.nominatedNodeName was not set for pod %v/%v: %v", preemptor.Namespace, preemptor.Name, err) - } - // Make sure that preemptor is scheduled after preemptions. - if err := testutils.WaitForPodToScheduleWithTimeout(testCtx.Ctx, cs, preemptor, 60*time.Second); err != nil { - t.Errorf("Preemptor pod %v didn't get scheduled: %v", preemptor.Name, err) - } - // Cleanup - klog.Info("Cleaning up all pods...") - allPods := pendingPods - allPods = append(allPods, runningPods...) - allPods = append(allPods, preemptor) - testutils.CleanupPods(testCtx.Ctx, cs, t, allPods) - }) + // Cleanup + klog.Info("Cleaning up all pods...") + allPods := pendingPods + allPods = append(allPods, runningPods...) + allPods = append(allPods, preemptor) + testutils.CleanupPods(testCtx.Ctx, cs, t, allPods) + }) + } } } } @@ -1441,74 +1452,80 @@ func TestPreemptionRaces(t *testing.T) { } for _, asyncPreemptionEnabled := range []bool{true, false} { - for _, test := range tests { - t.Run(fmt.Sprintf("%s (Async preemption enabled: %v)", test.name, asyncPreemptionEnabled), func(t *testing.T) { - featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.SchedulerAsyncPreemption, asyncPreemptionEnabled) + for _, clearingNominatedNodeNameAfterBinding := range []bool{true, false} { + for _, test := range tests { + t.Run(fmt.Sprintf("%s (Async preemption enabled: %v, ClearingNominatedNodeNameAfterBinding: %v)", test.name, asyncPreemptionEnabled, clearingNominatedNodeNameAfterBinding), func(t *testing.T) { + featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.SchedulerAsyncPreemption, asyncPreemptionEnabled) + featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.ClearingNominatedNodeNameAfterBinding, clearingNominatedNodeNameAfterBinding) - if test.numRepetitions <= 0 { - test.numRepetitions = 1 - } - for n := 0; n < test.numRepetitions; n++ { - initialPods := make([]*v1.Pod, test.numInitialPods) - additionalPods := make([]*v1.Pod, test.numAdditionalPods) - // Create and run existingPods. - for i := 0; i < test.numInitialPods; i++ { - initialPods[i], err = createPausePod(cs, mkPriorityPodWithGrace(testCtx, fmt.Sprintf("rpod-%v", i), mediumPriority, 0)) + if test.numRepetitions <= 0 { + test.numRepetitions = 1 + } + for n := 0; n < test.numRepetitions; n++ { + initialPods := make([]*v1.Pod, test.numInitialPods) + additionalPods := make([]*v1.Pod, test.numAdditionalPods) + // Create and run existingPods. + for i := 0; i < test.numInitialPods; i++ { + initialPods[i], err = createPausePod(cs, mkPriorityPodWithGrace(testCtx, fmt.Sprintf("rpod-%v", i), mediumPriority, 0)) + if err != nil { + t.Fatalf("Error creating pause pod: %v", err) + } + } + // make sure that initial Pods are all scheduled. + for _, p := range initialPods { + if err := testutils.WaitForPodToSchedule(testCtx.Ctx, cs, p); err != nil { + t.Fatalf("Pod %v/%v didn't get scheduled: %v", p.Namespace, p.Name, err) + } + } + // Create the preemptor. + klog.Info("Creating the preemptor pod...") + preemptor, err := createPausePod(cs, test.preemptor) if err != nil { - t.Fatalf("Error creating pause pod: %v", err) + t.Errorf("Error while creating the preempting pod: %v", err) } - } - // make sure that initial Pods are all scheduled. - for _, p := range initialPods { - if err := testutils.WaitForPodToSchedule(testCtx.Ctx, cs, p); err != nil { - t.Fatalf("Pod %v/%v didn't get scheduled: %v", p.Namespace, p.Name, err) - } - } - // Create the preemptor. - klog.Info("Creating the preemptor pod...") - preemptor, err := createPausePod(cs, test.preemptor) - if err != nil { - t.Errorf("Error while creating the preempting pod: %v", err) - } - klog.Info("Creating additional pods...") - for i := 0; i < test.numAdditionalPods; i++ { - additionalPods[i], err = createPausePod(cs, mkPriorityPodWithGrace(testCtx, fmt.Sprintf("ppod-%v", i), mediumPriority, 0)) - if err != nil { - t.Fatalf("Error creating pending pod: %v", err) + klog.Info("Creating additional pods...") + for i := 0; i < test.numAdditionalPods; i++ { + additionalPods[i], err = createPausePod(cs, mkPriorityPodWithGrace(testCtx, fmt.Sprintf("ppod-%v", i), mediumPriority, 0)) + if err != nil { + t.Fatalf("Error creating pending pod: %v", err) + } + } + // Make sure that preemptor is scheduled after preemptions. + if err := testutils.WaitForPodToScheduleWithTimeout(testCtx.Ctx, cs, preemptor, 60*time.Second); err != nil { + t.Errorf("Preemptor pod %v didn't get scheduled: %v", preemptor.Name, err) } - } - // Check that the preemptor pod gets nominated node name. - if err := testutils.WaitForNominatedNodeName(testCtx.Ctx, cs, preemptor); err != nil { - t.Errorf(".status.nominatedNodeName was not set for pod %v/%v: %v", preemptor.Namespace, preemptor.Name, err) - } - // Make sure that preemptor is scheduled after preemptions. - if err := testutils.WaitForPodToScheduleWithTimeout(testCtx.Ctx, cs, preemptor, 60*time.Second); err != nil { - t.Errorf("Preemptor pod %v didn't get scheduled: %v", preemptor.Name, err) - } - klog.Info("Check unschedulable pods still exists and were never scheduled...") - for _, p := range additionalPods { - pod, err := cs.CoreV1().Pods(p.Namespace).Get(testCtx.Ctx, p.Name, metav1.GetOptions{}) - if err != nil { - t.Errorf("Error in getting Pod %v/%v info: %v", p.Namespace, p.Name, err) + // Check that the preemptor pod gets nominated node name when feature gate is disabled. + if !clearingNominatedNodeNameAfterBinding { + if err := testutils.WaitForNominatedNodeName(testCtx.Ctx, cs, preemptor); err != nil { + t.Errorf(".status.nominatedNodeName was not set for pod %v/%v: %v", preemptor.Namespace, preemptor.Name, err) + } } - if len(pod.Spec.NodeName) > 0 { - t.Errorf("Pod %v/%v is already scheduled", p.Namespace, p.Name) - } - _, cond := podutil.GetPodCondition(&pod.Status, v1.PodScheduled) - if cond != nil && cond.Status != v1.ConditionFalse { - t.Errorf("Pod %v/%v is no longer unschedulable: %v", p.Namespace, p.Name, err) + + klog.Info("Check unschedulable pods still exists and were never scheduled...") + for _, p := range additionalPods { + pod, err := cs.CoreV1().Pods(p.Namespace).Get(testCtx.Ctx, p.Name, metav1.GetOptions{}) + if err != nil { + t.Errorf("Error in getting Pod %v/%v info: %v", p.Namespace, p.Name, err) + } + if len(pod.Spec.NodeName) > 0 { + t.Errorf("Pod %v/%v is already scheduled", p.Namespace, p.Name) + } + _, cond := podutil.GetPodCondition(&pod.Status, v1.PodScheduled) + if cond != nil && cond.Status != v1.ConditionFalse { + t.Errorf("Pod %v/%v is no longer unschedulable: %v", p.Namespace, p.Name, err) + } } + // Cleanup + klog.Info("Cleaning up all pods...") + allPods := additionalPods + allPods = append(allPods, initialPods...) + allPods = append(allPods, preemptor) + testutils.CleanupPods(testCtx.Ctx, cs, t, allPods) } - // Cleanup - klog.Info("Cleaning up all pods...") - allPods := additionalPods - allPods = append(allPods, initialPods...) - allPods = append(allPods, preemptor) - testutils.CleanupPods(testCtx.Ctx, cs, t, allPods) - } - }) + }) + } } } } @@ -1977,83 +1994,86 @@ func TestPDBInPreemption(t *testing.T) { } for _, asyncPreemptionEnabled := range []bool{true, false} { - for _, test := range tests { - t.Run(fmt.Sprintf("%s (Async preemption enabled: %v)", test.name, asyncPreemptionEnabled), func(t *testing.T) { - featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.SchedulerAsyncPreemption, asyncPreemptionEnabled) + for _, clearingNominatedNodeNameAfterBinding := range []bool{true, false} { + for _, test := range tests { + t.Run(fmt.Sprintf("%s (Async preemption enabled: %v, ClearingNominatedNodeNameAfterBinding: %v)", test.name, asyncPreemptionEnabled, clearingNominatedNodeNameAfterBinding), func(t *testing.T) { + featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.SchedulerAsyncPreemption, asyncPreemptionEnabled) + featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.ClearingNominatedNodeNameAfterBinding, clearingNominatedNodeNameAfterBinding) - for i := 1; i <= test.nodeCnt; i++ { - nodeName := fmt.Sprintf("node-%v", i) - _, err := createNode(cs, st.MakeNode().Name(nodeName).Capacity(defaultNodeRes).Obj()) - if err != nil { - t.Fatalf("Error creating node %v: %v", nodeName, err) - } - } - - pods := make([]*v1.Pod, len(test.existingPods)) - var err error - // Create and run existingPods. - for i, p := range test.existingPods { - if pods[i], err = runPausePod(cs, p); err != nil { - t.Fatalf("Test [%v]: Error running pause pod: %v", test.name, err) - } - // Add pod condition ready so that PDB is updated. - addPodConditionReady(p) - if _, err := testCtx.ClientSet.CoreV1().Pods(testCtx.NS.Name).UpdateStatus(testCtx.Ctx, p, metav1.UpdateOptions{}); err != nil { - t.Fatal(err) - } - } - // Wait for Pods to be stable in scheduler cache. - if err := waitCachedPodsStable(testCtx, test.existingPods); err != nil { - t.Fatalf("Not all pods are stable in the cache: %v", err) - } - - // Create PDBs. - for _, pdb := range test.pdbs { - _, err := testCtx.ClientSet.PolicyV1().PodDisruptionBudgets(testCtx.NS.Name).Create(testCtx.Ctx, pdb, metav1.CreateOptions{}) - if err != nil { - t.Fatalf("Failed to create PDB: %v", err) - } - } - // Wait for PDBs to become stable. - if err := waitForPDBsStable(testCtx, test.pdbs, test.pdbPodNum); err != nil { - t.Fatalf("Not all pdbs are stable in the cache: %v", err) - } - - // Create the "pod". - preemptor, err := createPausePod(cs, test.pod) - if err != nil { - t.Errorf("Error while creating high priority pod: %v", err) - } - // Wait for preemption of pods and make sure the other ones are not preempted. - for i, p := range pods { - if _, found := test.preemptedPodIndexes[i]; found { - if err = wait.PollUntilContextTimeout(testCtx.Ctx, time.Second, wait.ForeverTestTimeout, false, - podIsGettingEvicted(cs, p.Namespace, p.Name)); err != nil { - t.Errorf("Test [%v]: Pod %v/%v is not getting evicted.", test.name, p.Namespace, p.Name) - } - } else { - if p.DeletionTimestamp != nil { - t.Errorf("Test [%v]: Didn't expect pod %v/%v to get preempted.", test.name, p.Namespace, p.Name) + for i := 1; i <= test.nodeCnt; i++ { + nodeName := fmt.Sprintf("node-%v", i) + _, err := createNode(cs, st.MakeNode().Name(nodeName).Capacity(defaultNodeRes).Obj()) + if err != nil { + t.Fatalf("Error creating node %v: %v", nodeName, err) } } - } - // Also check if .status.nominatedNodeName of the preemptor pod gets set. - if len(test.preemptedPodIndexes) > 0 { - if err := testutils.WaitForNominatedNodeName(testCtx.Ctx, cs, preemptor); err != nil { - t.Errorf("Test [%v]: .status.nominatedNodeName was not set for pod %v/%v: %v", test.name, preemptor.Namespace, preemptor.Name, err) - } - } - // Cleanup - pods = append(pods, preemptor) - testutils.CleanupPods(testCtx.Ctx, cs, t, pods) - if err := cs.PolicyV1().PodDisruptionBudgets(testCtx.NS.Name).DeleteCollection(testCtx.Ctx, metav1.DeleteOptions{}, metav1.ListOptions{}); err != nil { - t.Errorf("error while deleting PDBs, error: %v", err) - } - if err := cs.CoreV1().Nodes().DeleteCollection(testCtx.Ctx, metav1.DeleteOptions{}, metav1.ListOptions{}); err != nil { - t.Errorf("error whiling deleting nodes, error: %v", err) - } - }) + pods := make([]*v1.Pod, len(test.existingPods)) + var err error + // Create and run existingPods. + for i, p := range test.existingPods { + if pods[i], err = runPausePod(cs, p); err != nil { + t.Fatalf("Test [%v]: Error running pause pod: %v", test.name, err) + } + // Add pod condition ready so that PDB is updated. + addPodConditionReady(p) + if _, err := testCtx.ClientSet.CoreV1().Pods(testCtx.NS.Name).UpdateStatus(testCtx.Ctx, p, metav1.UpdateOptions{}); err != nil { + t.Fatal(err) + } + } + // Wait for Pods to be stable in scheduler cache. + if err := waitCachedPodsStable(testCtx, test.existingPods); err != nil { + t.Fatalf("Not all pods are stable in the cache: %v", err) + } + + // Create PDBs. + for _, pdb := range test.pdbs { + _, err := testCtx.ClientSet.PolicyV1().PodDisruptionBudgets(testCtx.NS.Name).Create(testCtx.Ctx, pdb, metav1.CreateOptions{}) + if err != nil { + t.Fatalf("Failed to create PDB: %v", err) + } + } + // Wait for PDBs to become stable. + if err := waitForPDBsStable(testCtx, test.pdbs, test.pdbPodNum); err != nil { + t.Fatalf("Not all pdbs are stable in the cache: %v", err) + } + + // Create the "pod". + preemptor, err := createPausePod(cs, test.pod) + if err != nil { + t.Errorf("Error while creating high priority pod: %v", err) + } + // Wait for preemption of pods and make sure the other ones are not preempted. + for i, p := range pods { + if _, found := test.preemptedPodIndexes[i]; found { + if err = wait.PollUntilContextTimeout(testCtx.Ctx, time.Second, wait.ForeverTestTimeout, false, + podIsGettingEvicted(cs, p.Namespace, p.Name)); err != nil { + t.Errorf("Test [%v]: Pod %v/%v is not getting evicted.", test.name, p.Namespace, p.Name) + } + } else { + if p.DeletionTimestamp != nil { + t.Errorf("Test [%v]: Didn't expect pod %v/%v to get preempted.", test.name, p.Namespace, p.Name) + } + } + } + // Also check if .status.nominatedNodeName of the preemptor pod gets set. + if len(test.preemptedPodIndexes) > 0 && !clearingNominatedNodeNameAfterBinding { + if err := testutils.WaitForNominatedNodeName(testCtx.Ctx, cs, preemptor); err != nil { + t.Errorf("Test [%v]: .status.nominatedNodeName was not set for pod %v/%v: %v", test.name, preemptor.Namespace, preemptor.Name, err) + } + } + + // Cleanup + pods = append(pods, preemptor) + testutils.CleanupPods(testCtx.Ctx, cs, t, pods) + if err := cs.PolicyV1().PodDisruptionBudgets(testCtx.NS.Name).DeleteCollection(testCtx.Ctx, metav1.DeleteOptions{}, metav1.ListOptions{}); err != nil { + t.Errorf("error while deleting PDBs, error: %v", err) + } + if err := cs.CoreV1().Nodes().DeleteCollection(testCtx.Ctx, metav1.DeleteOptions{}, metav1.ListOptions{}); err != nil { + t.Errorf("error whiling deleting nodes, error: %v", err) + } + }) + } } } } @@ -2492,55 +2512,58 @@ func TestReadWriteOncePodPreemption(t *testing.T) { } for _, asyncPreemptionEnabled := range []bool{true, false} { - for _, test := range tests { - t.Run(fmt.Sprintf("%s (Async preemption enabled: %v)", test.name, asyncPreemptionEnabled), func(t *testing.T) { - featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.SchedulerAsyncPreemption, asyncPreemptionEnabled) + for _, clearingNominatedNodeNameAfterBinding := range []bool{true, false} { + for _, test := range tests { + t.Run(fmt.Sprintf("%s (Async preemption enabled: %v, ClearingNominatedNodeNameAfterBinding: %v)", test.name, asyncPreemptionEnabled, clearingNominatedNodeNameAfterBinding), func(t *testing.T) { + featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.SchedulerAsyncPreemption, asyncPreemptionEnabled) + featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.ClearingNominatedNodeNameAfterBinding, clearingNominatedNodeNameAfterBinding) - if err := test.init(); err != nil { - t.Fatalf("Error while initializing test: %v", err) - } + if err := test.init(); err != nil { + t.Fatalf("Error while initializing test: %v", err) + } - pods := make([]*v1.Pod, len(test.existingPods)) - t.Cleanup(func() { - testutils.CleanupPods(testCtx.Ctx, cs, t, pods) - if err := test.cleanup(); err != nil { - t.Errorf("Error cleaning up test: %v", err) + pods := make([]*v1.Pod, len(test.existingPods)) + t.Cleanup(func() { + testutils.CleanupPods(testCtx.Ctx, cs, t, pods) + if err := test.cleanup(); err != nil { + t.Errorf("Error cleaning up test: %v", err) + } + }) + // Create and run existingPods. + for i, p := range test.existingPods { + var err error + pods[i], err = runPausePod(cs, p) + if err != nil { + t.Fatalf("Error running pause pod: %v", err) + } + } + // Create the "pod". + preemptor, err := createPausePod(cs, test.pod) + if err != nil { + t.Errorf("Error while creating high priority pod: %v", err) + } + pods = append(pods, preemptor) + // Wait for preemption of pods and make sure the other ones are not preempted. + for i, p := range pods { + if _, found := test.preemptedPodIndexes[i]; found { + if err = wait.PollUntilContextTimeout(testCtx.Ctx, time.Second, wait.ForeverTestTimeout, false, + podIsGettingEvicted(cs, p.Namespace, p.Name)); err != nil { + t.Errorf("Pod %v/%v is not getting evicted.", p.Namespace, p.Name) + } + } else { + if p.DeletionTimestamp != nil { + t.Errorf("Didn't expect pod %v to get preempted.", p.Name) + } + } + } + // Also check that the preemptor pod gets the NominatedNodeName field set. + if len(test.preemptedPodIndexes) > 0 && !clearingNominatedNodeNameAfterBinding { + if err := testutils.WaitForNominatedNodeName(testCtx.Ctx, cs, preemptor); err != nil { + t.Errorf("NominatedNodeName field was not set for pod %v: %v", preemptor.Name, err) + } } }) - // Create and run existingPods. - for i, p := range test.existingPods { - var err error - pods[i], err = runPausePod(cs, p) - if err != nil { - t.Fatalf("Error running pause pod: %v", err) - } - } - // Create the "pod". - preemptor, err := createPausePod(cs, test.pod) - if err != nil { - t.Errorf("Error while creating high priority pod: %v", err) - } - pods = append(pods, preemptor) - // Wait for preemption of pods and make sure the other ones are not preempted. - for i, p := range pods { - if _, found := test.preemptedPodIndexes[i]; found { - if err = wait.PollUntilContextTimeout(testCtx.Ctx, time.Second, wait.ForeverTestTimeout, false, - podIsGettingEvicted(cs, p.Namespace, p.Name)); err != nil { - t.Errorf("Pod %v/%v is not getting evicted.", p.Namespace, p.Name) - } - } else { - if p.DeletionTimestamp != nil { - t.Errorf("Didn't expect pod %v to get preempted.", p.Name) - } - } - } - // Also check that the preemptor pod gets the NominatedNodeName field set. - if len(test.preemptedPodIndexes) > 0 { - if err := testutils.WaitForNominatedNodeName(testCtx.Ctx, cs, preemptor); err != nil { - t.Errorf("NominatedNodeName field was not set for pod %v: %v", preemptor.Name, err) - } - } - }) + } } } }