Merge pull request #20718 from wojtek-t/timeouts_in_load_test

Auto commit by PR queue bot
2025-11-03 19:58:17 +00:00 · 2016-02-08 05:23:14 -08:00
parent 3cacc7e9ce a1a6218dc6
commit e4cb6a09b2
2 changed files with 43 additions and 23 deletions
--- a/test/e2e/load.go
+++ b/test/e2e/load.go
@@ -56,8 +56,6 @@ var _ = Describe("Load capacity", func() {
 	// Gathers metrics before teardown
 	// TODO add flag that allows to skip cleanup on failure
 	AfterEach(func() {
-		deleteAllRC(configs)
-
 		// Verify latency metrics
 		highLatencyRequests, err := HighLatencyRequests(c)
 		expectNoError(err, "Too many instances metrics above the threshold")
@@ -70,7 +68,14 @@ var _ = Describe("Load capacity", func() {
 	framework.NamespaceDeletionTimeout = time.Hour

 	BeforeEach(func() {
-		c = framework.Client
+		// Explicitly create a client with higher QPS limits.
+		config, err := loadConfig()
+		Expect(err).NotTo(HaveOccurred())
+		config.QPS = 50
+		config.Burst = 100
+		c, err = loadClientFromConfig(config)
+		Expect(err).NotTo(HaveOccurred())
+
 		ns = framework.Namespace.Name
 		nodes := ListSchedulableNodesOrDie(c)
 		nodeCount = len(nodes.Items)
@@ -79,7 +84,7 @@ var _ = Describe("Load capacity", func() {
 		// Terminating a namespace (deleting the remaining objects from it - which
 		// generally means events) can affect the current run. Thus we wait for all
 		// terminating namespace to be finally deleted before starting this test.
-		err := checkTestingNSDeletedExcept(c, ns)
+		err = checkTestingNSDeletedExcept(c, ns)
 		expectNoError(err)

 		expectNoError(resetMetrics(c))
@@ -107,7 +112,8 @@ var _ = Describe("Load capacity", func() {
 		itArg := testArg

 		It(name, func() {
-			configs = generateRCConfigs(itArg.podsPerNode*nodeCount, itArg.image, itArg.command, c, ns)
+			totalPods := itArg.podsPerNode * nodeCount
+			configs = generateRCConfigs(totalPods, itArg.image, itArg.command, c, ns)

 			// Simulate lifetime of RC:
 			//  * create with initial size
@@ -116,16 +122,28 @@ var _ = Describe("Load capacity", func() {
 			//  * delete it
 			//
 			// This will generate ~5 creations/deletions per second assuming:
-			//  - 300 small RCs each 5 pods
-			//  - 25 medium RCs each 30 pods
-			//  - 3 big RCs each 250 pods
-			createAllRC(configs)
-			// TODO add reseting latency metrics here, once it would be supported.
+			//  - X small RCs each 5 pods   [ 5 * X = totalPods / 2 ]
+			//  - Y medium RCs each 30 pods [ 30 * Y = totalPods / 4 ]
+			//  - Z big RCs each 250 pods   [ 250 * Z = totalPods / 4]
+
+			// We would like to spread creating replication controllers over time
+			// to make it possible to create/schedule them in the meantime.
+			// Currently we assume 5 pods/second average throughput.
+			// We may want to revisit it in the future.
+			creatingTime := time.Duration(totalPods/5) * time.Second
+			createAllRC(configs, creatingTime)
+
 			By("============================================================================")
 			scaleAllRC(configs)
 			By("============================================================================")
 			scaleAllRC(configs)
 			By("============================================================================")
+
+			// Cleanup all created replication controllers.
+			// Currently we assume 5 pods/second average deletion throughput.
+			// We may want to revisit it in the future.
+			deletingTime := time.Duration(totalPods/5) * time.Second
+			deleteAllRC(configs, deletingTime)
 		})
 	}
 })
@@ -176,19 +194,18 @@ func sleepUpTo(d time.Duration) {
 	time.Sleep(time.Duration(rand.Int63n(d.Nanoseconds())))
 }

-func createAllRC(configs []*RCConfig) {
+func createAllRC(configs []*RCConfig, creatingTime time.Duration) {
 	var wg sync.WaitGroup
 	wg.Add(len(configs))
 	for _, config := range configs {
-		go createRC(&wg, config)
+		go createRC(&wg, config, creatingTime)
 	}
 	wg.Wait()
 }

-func createRC(wg *sync.WaitGroup, config *RCConfig) {
+func createRC(wg *sync.WaitGroup, config *RCConfig, creatingTime time.Duration) {
 	defer GinkgoRecover()
 	defer wg.Done()
-	creatingTime := 10 * time.Minute

 	sleepUpTo(creatingTime)
 	expectNoError(RunRC(*config), fmt.Sprintf("creating rc %s", config.Name))
@@ -223,19 +240,18 @@ func scaleRC(wg *sync.WaitGroup, config *RCConfig) {
 	expectNoError(err, fmt.Sprintf("listing pods from rc %v", config.Name))
 }

-func deleteAllRC(configs []*RCConfig) {
+func deleteAllRC(configs []*RCConfig, deletingTime time.Duration) {
 	var wg sync.WaitGroup
 	wg.Add(len(configs))
 	for _, config := range configs {
-		go deleteRC(&wg, config)
+		go deleteRC(&wg, config, deletingTime)
 	}
 	wg.Wait()
 }

-func deleteRC(wg *sync.WaitGroup, config *RCConfig) {
+func deleteRC(wg *sync.WaitGroup, config *RCConfig, deletingTime time.Duration) {
 	defer GinkgoRecover()
 	defer wg.Done()
-	deletingTime := 10 * time.Minute

 	sleepUpTo(deletingTime)
 	expectNoError(DeleteRC(config.Client, config.Namespace, config.Name), fmt.Sprintf("deleting rc %s", config.Name))
--- a/test/e2e/util.go
+++ b/test/e2e/util.go
@@ -1115,11 +1115,7 @@ func loadConfig() (*client.Config, error) {
 	}
 }

-func loadClient() (*client.Client, error) {
-	config, err := loadConfig()
-	if err != nil {
-		return nil, fmt.Errorf("error creating client: %v", err.Error())
-	}
+func loadClientFromConfig(config *client.Config) (*client.Client, error) {
 	c, err := client.New(config)
 	if err != nil {
 		return nil, fmt.Errorf("error creating client: %v", err.Error())
@@ -1130,6 +1126,14 @@ func loadClient() (*client.Client, error) {
 	return c, nil
 }

+func loadClient() (*client.Client, error) {
+	config, err := loadConfig()
+	if err != nil {
+		return nil, fmt.Errorf("error creating client: %v", err.Error())
+	}
+	return loadClientFromConfig(config)
+}
+
 // randomSuffix provides a random string to append to pods,services,rcs.
 // TODO: Allow service names to have the same form as names
 //       for pods and replication controllers so we don't