e2e/storage: speed up kubectl commands

Speed up stopping by not waiting for Node not ready, `systemctl` will ensure kubelet process stopped before return. This should save 40s per case. Since stop command does not wait for not ready, start command needs to wait for the next heartbeat to ensure we are checking status from new process. implement restart by stop then start, to get heartbeat time when kubelet is down. And we do not need to sleep 30s now. The sleep is moved to callers, since they still need them to ensure the volume does not disappear. Dropped support for non-systemd system.
2025-11-26 19:35:10 +00:00 · 2024-03-22 16:57:06 +08:00
parent 95a6f2e4dc
commit f3f44f70bf
4 changed files with 73 additions and 67 deletions
--- a/test/e2e/framework/node/wait.go
+++ b/test/e2e/framework/node/wait.go
@@ -160,6 +160,27 @@ func WaitForNodeSchedulable(ctx context.Context, c clientset.Interface, name str
 	return false
 }

+// WaitForNodeHeartbeatAfter waits up to timeout for node to send the next
+// heartbeat after the given timestamp.
+//
+// To ensure the node status is posted by a restarted kubelet process,
+// after should be retrieved by [GetNodeHeartbeatTime] while the kubelet is down.
+func WaitForNodeHeartbeatAfter(ctx context.Context, c clientset.Interface, name string, after metav1.Time, timeout time.Duration) bool {
+	framework.Logf("Waiting up to %v for node %s to send a heartbeat after %v", timeout, name, after)
+	for start := time.Now(); time.Since(start) < timeout; time.Sleep(poll) {
+		node, err := c.CoreV1().Nodes().Get(ctx, name, metav1.GetOptions{})
+		if err != nil {
+			framework.Logf("Couldn't get node %s", name)
+			continue
+		}
+		if GetNodeHeartbeatTime(node).After(after.Time) {
+			return true
+		}
+	}
+	framework.Logf("Node %s didn't send a heartbeat after %v within %v", name, after, timeout)
+	return false
+}
+
 // CheckReady waits up to timeout for cluster to has desired size and
 // there is no not-ready nodes in it. By cluster size we mean number of schedulable Nodes.
 func CheckReady(ctx context.Context, c clientset.Interface, size int, timeout time.Duration) ([]v1.Node, error) {