Allow swapping NotReady and Unschedulable Taints

This commit is contained in:
gmarek
2017-04-04 15:35:44 +02:00
parent 46d4c621a8
commit 576ad815c8
5 changed files with 377 additions and 109 deletions

View File

@@ -74,7 +74,9 @@ func NewNodeControllerFromClient(
clusterCIDR *net.IPNet,
serviceCIDR *net.IPNet,
nodeCIDRMaskSize int,
allocateNodeCIDRs bool) (*nodeController, error) {
allocateNodeCIDRs bool,
useTaints bool,
) (*nodeController, error) {
factory := informers.NewSharedInformerFactory(kubeClient, controller.NoResyncPeriodFunc())
@@ -99,8 +101,8 @@ func NewNodeControllerFromClient(
serviceCIDR,
nodeCIDRMaskSize,
allocateNodeCIDRs,
false,
false,
useTaints,
useTaints,
)
if err != nil {
return nil, err
@@ -549,7 +551,7 @@ func TestMonitorNodeStatusEvictPods(t *testing.T) {
for _, item := range table {
nodeController, _ := NewNodeControllerFromClient(nil, item.fakeNodeHandler,
evictionTimeout, testRateLimiterQPS, testRateLimiterQPS, testLargeClusterThreshold, testUnhealtyThreshold, testNodeMonitorGracePeriod,
testNodeStartupGracePeriod, testNodeMonitorPeriod, nil, nil, 0, false)
testNodeStartupGracePeriod, testNodeMonitorPeriod, nil, nil, 0, false, false)
nodeController.now = func() metav1.Time { return fakeNow }
nodeController.recorder = testutil.NewFakeRecorder()
for _, ds := range item.daemonSets {
@@ -698,7 +700,7 @@ func TestPodStatusChange(t *testing.T) {
for _, item := range table {
nodeController, _ := NewNodeControllerFromClient(nil, item.fakeNodeHandler,
evictionTimeout, testRateLimiterQPS, testRateLimiterQPS, testLargeClusterThreshold, testUnhealtyThreshold, testNodeMonitorGracePeriod,
testNodeStartupGracePeriod, testNodeMonitorPeriod, nil, nil, 0, false)
testNodeStartupGracePeriod, testNodeMonitorPeriod, nil, nil, 0, false, false)
nodeController.now = func() metav1.Time { return fakeNow }
nodeController.recorder = testutil.NewFakeRecorder()
if err := syncNodeStore(nodeController, item.fakeNodeHandler); err != nil {
@@ -1215,7 +1217,7 @@ func TestMonitorNodeStatusEvictPodsWithDisruption(t *testing.T) {
}
nodeController, _ := NewNodeControllerFromClient(nil, fakeNodeHandler,
evictionTimeout, testRateLimiterQPS, testRateLimiterQPS, testLargeClusterThreshold, testUnhealtyThreshold, testNodeMonitorGracePeriod,
testNodeStartupGracePeriod, testNodeMonitorPeriod, nil, nil, 0, false)
testNodeStartupGracePeriod, testNodeMonitorPeriod, nil, nil, 0, false, false)
nodeController.now = func() metav1.Time { return fakeNow }
nodeController.enterPartialDisruptionFunc = func(nodeNum int) float32 {
return testRateLimiterQPS
@@ -1310,7 +1312,7 @@ func TestCloudProviderNoRateLimit(t *testing.T) {
nodeController, _ := NewNodeControllerFromClient(nil, fnh, 10*time.Minute,
testRateLimiterQPS, testRateLimiterQPS, testLargeClusterThreshold, testUnhealtyThreshold,
testNodeMonitorGracePeriod, testNodeStartupGracePeriod,
testNodeMonitorPeriod, nil, nil, 0, false)
testNodeMonitorPeriod, nil, nil, 0, false, false)
nodeController.cloud = &fakecloud.FakeCloud{}
nodeController.now = func() metav1.Time { return metav1.Date(2016, 1, 1, 12, 0, 0, 0, time.UTC) }
nodeController.recorder = testutil.NewFakeRecorder()
@@ -1579,7 +1581,7 @@ func TestMonitorNodeStatusUpdateStatus(t *testing.T) {
for i, item := range table {
nodeController, _ := NewNodeControllerFromClient(nil, item.fakeNodeHandler, 5*time.Minute,
testRateLimiterQPS, testRateLimiterQPS, testLargeClusterThreshold, testUnhealtyThreshold,
testNodeMonitorGracePeriod, testNodeStartupGracePeriod, testNodeMonitorPeriod, nil, nil, 0, false)
testNodeMonitorGracePeriod, testNodeStartupGracePeriod, testNodeMonitorPeriod, nil, nil, 0, false, false)
nodeController.now = func() metav1.Time { return fakeNow }
nodeController.recorder = testutil.NewFakeRecorder()
if err := syncNodeStore(nodeController, item.fakeNodeHandler); err != nil {
@@ -1813,7 +1815,7 @@ func TestMonitorNodeStatusMarkPodsNotReady(t *testing.T) {
for i, item := range table {
nodeController, _ := NewNodeControllerFromClient(nil, item.fakeNodeHandler, 5*time.Minute,
testRateLimiterQPS, testRateLimiterQPS, testLargeClusterThreshold, testUnhealtyThreshold,
testNodeMonitorGracePeriod, testNodeStartupGracePeriod, testNodeMonitorPeriod, nil, nil, 0, false)
testNodeMonitorGracePeriod, testNodeStartupGracePeriod, testNodeMonitorPeriod, nil, nil, 0, false, false)
nodeController.now = func() metav1.Time { return fakeNow }
nodeController.recorder = testutil.NewFakeRecorder()
if err := syncNodeStore(nodeController, item.fakeNodeHandler); err != nil {
@@ -1845,6 +1847,146 @@ func TestMonitorNodeStatusMarkPodsNotReady(t *testing.T) {
}
}
func TestSwapUnreachableNotReadyTaints(t *testing.T) {
fakeNow := metav1.Date(2017, 1, 1, 12, 0, 0, 0, time.UTC)
evictionTimeout := 10 * time.Minute
fakeNodeHandler := &testutil.FakeNodeHandler{
Existing: []*v1.Node{
{
ObjectMeta: metav1.ObjectMeta{
Name: "node0",
CreationTimestamp: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
Labels: map[string]string{
metav1.LabelZoneRegion: "region1",
metav1.LabelZoneFailureDomain: "zone1",
},
},
Status: v1.NodeStatus{
Conditions: []v1.NodeCondition{
{
Type: v1.NodeReady,
Status: v1.ConditionUnknown,
LastHeartbeatTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
LastTransitionTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
},
},
},
},
// Because of the logic that prevents NC from evicting anything when all Nodes are NotReady
// we need second healthy node in tests. Because of how the tests are written we need to update
// the status of this Node.
{
ObjectMeta: metav1.ObjectMeta{
Name: "node1",
CreationTimestamp: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
Labels: map[string]string{
metav1.LabelZoneRegion: "region1",
metav1.LabelZoneFailureDomain: "zone1",
},
},
Status: v1.NodeStatus{
Conditions: []v1.NodeCondition{
{
Type: v1.NodeReady,
Status: v1.ConditionTrue,
LastHeartbeatTime: metav1.Date(2017, 1, 1, 12, 0, 0, 0, time.UTC),
LastTransitionTime: metav1.Date(2017, 1, 1, 12, 0, 0, 0, time.UTC),
},
},
},
},
},
Clientset: fake.NewSimpleClientset(&v1.PodList{Items: []v1.Pod{*testutil.NewPod("pod0", "node0")}}),
}
timeToPass := evictionTimeout
newNodeStatus := v1.NodeStatus{
Conditions: []v1.NodeCondition{
{
Type: v1.NodeReady,
Status: v1.ConditionFalse,
// Node status has just been updated, and is NotReady for 10min.
LastHeartbeatTime: metav1.Date(2017, 1, 1, 12, 9, 0, 0, time.UTC),
LastTransitionTime: metav1.Date(2017, 1, 1, 12, 0, 0, 0, time.UTC),
},
},
}
healthyNodeNewStatus := v1.NodeStatus{
Conditions: []v1.NodeCondition{
{
Type: v1.NodeReady,
Status: v1.ConditionTrue,
LastHeartbeatTime: metav1.Date(2017, 1, 1, 12, 10, 0, 0, time.UTC),
LastTransitionTime: metav1.Date(2017, 1, 1, 12, 0, 0, 0, time.UTC),
},
},
}
originalTaint := UnreachableTaintTemplate
updatedTaint := NotReadyTaintTemplate
nodeController, _ := NewNodeControllerFromClient(nil, fakeNodeHandler,
evictionTimeout, testRateLimiterQPS, testRateLimiterQPS, testLargeClusterThreshold, testUnhealtyThreshold, testNodeMonitorGracePeriod,
testNodeStartupGracePeriod, testNodeMonitorPeriod, nil, nil, 0, false, true)
nodeController.now = func() metav1.Time { return fakeNow }
nodeController.recorder = testutil.NewFakeRecorder()
if err := syncNodeStore(nodeController, fakeNodeHandler); err != nil {
t.Errorf("unexpected error: %v", err)
}
if err := nodeController.monitorNodeStatus(); err != nil {
t.Errorf("unexpected error: %v", err)
}
nodeController.doTaintingPass()
node0, err := fakeNodeHandler.Get("node0", metav1.GetOptions{})
if err != nil {
t.Errorf("Can't get current node0...")
return
}
node1, err := fakeNodeHandler.Get("node1", metav1.GetOptions{})
if err != nil {
t.Errorf("Can't get current node1...")
return
}
if originalTaint != nil && !v1.TaintExists(node0.Spec.Taints, originalTaint) {
t.Errorf("Can't find taint %v in %v", originalTaint, node0.Spec.Taints)
}
nodeController.now = func() metav1.Time { return metav1.Time{Time: fakeNow.Add(timeToPass)} }
node0.Status = newNodeStatus
node1.Status = healthyNodeNewStatus
_, err = fakeNodeHandler.UpdateStatus(node0)
if err != nil {
t.Errorf(err.Error())
return
}
_, err = fakeNodeHandler.UpdateStatus(node1)
if err != nil {
t.Errorf(err.Error())
return
}
if err := syncNodeStore(nodeController, fakeNodeHandler); err != nil {
t.Errorf("unexpected error: %v", err)
}
if err := nodeController.monitorNodeStatus(); err != nil {
t.Errorf("unexpected error: %v", err)
}
nodeController.doTaintingPass()
node0, err = fakeNodeHandler.Get("node0", metav1.GetOptions{})
if err != nil {
t.Errorf("Can't get current node0...")
return
}
if updatedTaint != nil {
if !v1.TaintExists(node0.Spec.Taints, updatedTaint) {
t.Errorf("Can't find taint %v in %v", updatedTaint, node0.Spec.Taints)
}
}
}
func TestNodeEventGeneration(t *testing.T) {
fakeNow := metav1.Date(2016, 9, 10, 12, 0, 0, 0, time.UTC)
fakeNodeHandler := &testutil.FakeNodeHandler{
@@ -1876,7 +2018,7 @@ func TestNodeEventGeneration(t *testing.T) {
nodeController, _ := NewNodeControllerFromClient(nil, fakeNodeHandler, 5*time.Minute,
testRateLimiterQPS, testRateLimiterQPS, testLargeClusterThreshold, testUnhealtyThreshold,
testNodeMonitorGracePeriod, testNodeStartupGracePeriod,
testNodeMonitorPeriod, nil, nil, 0, false)
testNodeMonitorPeriod, nil, nil, 0, false, false)
nodeController.cloud = &fakecloud.FakeCloud{}
nodeController.nodeExistsInCloudProvider = func(nodeName types.NodeName) (bool, error) {
return false, nil
@@ -1987,7 +2129,7 @@ func TestCheckPod(t *testing.T) {
},
}
nc, _ := NewNodeControllerFromClient(nil, fake.NewSimpleClientset(), 0, 0, 0, 0, 0, 0, 0, 0, nil, nil, 0, false)
nc, _ := NewNodeControllerFromClient(nil, fake.NewSimpleClientset(), 0, 0, 0, 0, 0, 0, 0, 0, nil, nil, 0, false, false)
nc.nodeInformer.Informer().GetStore().Add(&v1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "new",