Fix deadlock in TestPostgreSQLBackend (#6884)

Make lock2's retryInterval smaller so it grabs the lock as soon as lock1's renewer fails to renew in time. Fix the logic to test if lock1's leader channel gets closed: we don't need a goroutine, and the logic was broken in that if we timed out we'd never write to the blocking channel we then try to read from. Moreover the timeout was wrong.
2025-11-01 19:17:58 +00:00 · 2019-06-14 12:59:24 -04:00
parent c6c79cab84
commit 47d4e5b1f6
1 changed files with 66 additions and 77 deletions
--- a/physical/postgresql/postgresql_test.go
+++ b/physical/postgresql/postgresql_test.go
@@ -107,103 +107,92 @@ func testPostgresSQLLockTTL(t *testing.T, ha physical.HABackend) {
 	// Set much smaller lock times to speed up the test.
 	lockTTL := 3
 	renewInterval := time.Second * 1
-	watchInterval := time.Second * 1
+	retryInterval := time.Second * 1
+	longRenewInterval := time.Duration(lockTTL*2) * time.Second
+	lockkey := "postgresttl"
+
+	var leaderCh <-chan struct{}

 	// Get the lock
-	origLock, err := ha.LockWith("dynamodbttl", "bar")
+	origLock, err := ha.LockWith(lockkey, "bar")
 	if err != nil {
 		t.Fatalf("err: %v", err)
 	}
-	// set the first lock renew period to double the expected TTL.
-	lock := origLock.(*PostgreSQLLock)
-	lock.renewInterval = time.Duration(lockTTL*2) * time.Second
-	lock.ttlSeconds = lockTTL
-	// lock.retryInterval = watchInterval
+	{
+		// set the first lock renew period to double the expected TTL.
+		lock := origLock.(*PostgreSQLLock)
+		lock.renewInterval = longRenewInterval
+		lock.ttlSeconds = lockTTL

-	// Attempt to lock
-	leaderCh, err := lock.Lock(nil)
-	if err != nil {
-		t.Fatalf("err: %v", err)
-	}
-	if leaderCh == nil {
-		t.Fatalf("failed to get leader ch")
-	}
+		// Attempt to lock
+		leaderCh, err = lock.Lock(nil)
+		if err != nil {
+			t.Fatalf("err: %v", err)
+		}
+		if leaderCh == nil {
+			t.Fatalf("failed to get leader ch")
+		}

-	// Check the value
-	held, val, err := lock.Value()
-	if err != nil {
-		t.Fatalf("err: %v", err)
-	}
-	if !held {
-		t.Fatalf("should be held")
-	}
-	if val != "bar" {
-		t.Fatalf("bad value: %v", err)
+		// Check the value
+		held, val, err := lock.Value()
+		if err != nil {
+			t.Fatalf("err: %v", err)
+		}
+		if !held {
+			t.Fatalf("should be held")
+		}
+		if val != "bar" {
+			t.Fatalf("bad value: %v", val)
+		}
 	}

 	// Second acquisition should succeed because the first lock should
 	// not renew within the 3 sec TTL.
-	origLock2, err := ha.LockWith("dynamodbttl", "baz")
+	origLock2, err := ha.LockWith(lockkey, "baz")
 	if err != nil {
 		t.Fatalf("err: %v", err)
 	}
+	{
+		lock2 := origLock2.(*PostgreSQLLock)
+		lock2.renewInterval = renewInterval
+		lock2.ttlSeconds = lockTTL
+		lock2.retryInterval = retryInterval

-	lock2 := origLock2.(*PostgreSQLLock)
-	lock2.renewInterval = renewInterval
-	lock2.ttlSeconds = lockTTL
-	// lock2.retryInterval = watchInterval
+		// Cancel attempt in 6 sec so as not to block unit tests forever
+		stopCh := make(chan struct{})
+		time.AfterFunc(time.Duration(lockTTL*2)*time.Second, func() {
+			close(stopCh)
+		})

-	// Cancel attempt in 6 sec so as not to block unit tests forever
-	stopCh := make(chan struct{})
-	time.AfterFunc(time.Duration(lockTTL*2)*time.Second, func() {
-		close(stopCh)
-	})
+		// Attempt to lock should work
+		leaderCh2, err := lock2.Lock(stopCh)
+		if err != nil {
+			t.Fatalf("err: %v", err)
+		}
+		if leaderCh2 == nil {
+			t.Fatalf("should get leader ch")
+		}
+		defer lock2.Unlock()

-	// Attempt to lock should work
-	leaderCh2, err := lock2.Lock(stopCh)
-	if err != nil {
-		t.Fatalf("err: %v", err)
-	}
-	if leaderCh2 == nil {
-		t.Fatalf("should get leader ch")
-	}
-
-	// Check the value
-	held, val, err = lock2.Value()
-	if err != nil {
-		t.Fatalf("err: %v", err)
-	}
-	if !held {
-		t.Fatalf("should be held")
-	}
-	if val != "baz" {
-		t.Fatalf("bad value: %v", err)
+		// Check the value
+		held, val, err := lock2.Value()
+		if err != nil {
+			t.Fatalf("err: %v", err)
+		}
+		if !held {
+			t.Fatalf("should be held")
+		}
+		if val != "baz" {
+			t.Fatalf("bad value: %v", val)
+		}
 	}

 	// The first lock should have lost the leader channel
-	leaderChClosed := false
-	blocking := make(chan struct{})
-	// Attempt to read from the leader or the blocking channel, which ever one
-	// happens first.
-	go func() {
-		select {
-		case <-time.After(watchInterval * 3):
-			return
-		case <-leaderCh:
-			leaderChClosed = true
-			close(blocking)
-		case <-blocking:
-			return
-		}
-	}()
-
-	<-blocking
-	if !leaderChClosed {
+	select {
+	case <-time.After(longRenewInterval * 2):
 		t.Fatalf("original lock did not have its leader channel closed.")
+	case <-leaderCh:
 	}
-
-	// Cleanup
-	lock2.Unlock()
 }

 // Verify that once Unlock is called, we don't keep trying to renew the original
@@ -237,7 +226,7 @@ func testPostgresSQLLockRenewal(t *testing.T, ha physical.HABackend) {
 		t.Fatalf("should be held")
 	}
 	if val != "bar" {
-		t.Fatalf("bad value: %v", err)
+		t.Fatalf("bad value: %v", val)
 	}

 	// Release the lock, which will delete the stored item
@@ -280,7 +269,7 @@ func testPostgresSQLLockRenewal(t *testing.T, ha physical.HABackend) {
 		t.Fatalf("should be held")
 	}
 	if val != "baz" {
-		t.Fatalf("bad value: %v", err)
+		t.Fatalf("bad value: %v", val)
 	}

 	// Cleanup