Fix deadlock in TestPostgreSQLBackend (#6884)

Make lock2's retryInterval smaller so it grabs the lock as soon as lock1's renewer fails to renew in time.  Fix the logic to test if lock1's leader channel gets closed: we don't need a goroutine, and
the logic was broken in that if we timed out we'd never write to the blocking channel we then try to read from.  Moreover the timeout was wrong.
This commit is contained in:
ncabatoff
2019-06-14 12:59:24 -04:00
committed by GitHub
parent c6c79cab84
commit 47d4e5b1f6

View File

@@ -107,103 +107,92 @@ func testPostgresSQLLockTTL(t *testing.T, ha physical.HABackend) {
// Set much smaller lock times to speed up the test.
lockTTL := 3
renewInterval := time.Second * 1
watchInterval := time.Second * 1
retryInterval := time.Second * 1
longRenewInterval := time.Duration(lockTTL*2) * time.Second
lockkey := "postgresttl"
var leaderCh <-chan struct{}
// Get the lock
origLock, err := ha.LockWith("dynamodbttl", "bar")
origLock, err := ha.LockWith(lockkey, "bar")
if err != nil {
t.Fatalf("err: %v", err)
}
// set the first lock renew period to double the expected TTL.
lock := origLock.(*PostgreSQLLock)
lock.renewInterval = time.Duration(lockTTL*2) * time.Second
lock.ttlSeconds = lockTTL
// lock.retryInterval = watchInterval
{
// set the first lock renew period to double the expected TTL.
lock := origLock.(*PostgreSQLLock)
lock.renewInterval = longRenewInterval
lock.ttlSeconds = lockTTL
// Attempt to lock
leaderCh, err := lock.Lock(nil)
if err != nil {
t.Fatalf("err: %v", err)
}
if leaderCh == nil {
t.Fatalf("failed to get leader ch")
}
// Attempt to lock
leaderCh, err = lock.Lock(nil)
if err != nil {
t.Fatalf("err: %v", err)
}
if leaderCh == nil {
t.Fatalf("failed to get leader ch")
}
// Check the value
held, val, err := lock.Value()
if err != nil {
t.Fatalf("err: %v", err)
}
if !held {
t.Fatalf("should be held")
}
if val != "bar" {
t.Fatalf("bad value: %v", err)
// Check the value
held, val, err := lock.Value()
if err != nil {
t.Fatalf("err: %v", err)
}
if !held {
t.Fatalf("should be held")
}
if val != "bar" {
t.Fatalf("bad value: %v", val)
}
}
// Second acquisition should succeed because the first lock should
// not renew within the 3 sec TTL.
origLock2, err := ha.LockWith("dynamodbttl", "baz")
origLock2, err := ha.LockWith(lockkey, "baz")
if err != nil {
t.Fatalf("err: %v", err)
}
{
lock2 := origLock2.(*PostgreSQLLock)
lock2.renewInterval = renewInterval
lock2.ttlSeconds = lockTTL
lock2.retryInterval = retryInterval
lock2 := origLock2.(*PostgreSQLLock)
lock2.renewInterval = renewInterval
lock2.ttlSeconds = lockTTL
// lock2.retryInterval = watchInterval
// Cancel attempt in 6 sec so as not to block unit tests forever
stopCh := make(chan struct{})
time.AfterFunc(time.Duration(lockTTL*2)*time.Second, func() {
close(stopCh)
})
// Cancel attempt in 6 sec so as not to block unit tests forever
stopCh := make(chan struct{})
time.AfterFunc(time.Duration(lockTTL*2)*time.Second, func() {
close(stopCh)
})
// Attempt to lock should work
leaderCh2, err := lock2.Lock(stopCh)
if err != nil {
t.Fatalf("err: %v", err)
}
if leaderCh2 == nil {
t.Fatalf("should get leader ch")
}
defer lock2.Unlock()
// Attempt to lock should work
leaderCh2, err := lock2.Lock(stopCh)
if err != nil {
t.Fatalf("err: %v", err)
}
if leaderCh2 == nil {
t.Fatalf("should get leader ch")
}
// Check the value
held, val, err = lock2.Value()
if err != nil {
t.Fatalf("err: %v", err)
}
if !held {
t.Fatalf("should be held")
}
if val != "baz" {
t.Fatalf("bad value: %v", err)
// Check the value
held, val, err := lock2.Value()
if err != nil {
t.Fatalf("err: %v", err)
}
if !held {
t.Fatalf("should be held")
}
if val != "baz" {
t.Fatalf("bad value: %v", val)
}
}
// The first lock should have lost the leader channel
leaderChClosed := false
blocking := make(chan struct{})
// Attempt to read from the leader or the blocking channel, which ever one
// happens first.
go func() {
select {
case <-time.After(watchInterval * 3):
return
case <-leaderCh:
leaderChClosed = true
close(blocking)
case <-blocking:
return
}
}()
<-blocking
if !leaderChClosed {
select {
case <-time.After(longRenewInterval * 2):
t.Fatalf("original lock did not have its leader channel closed.")
case <-leaderCh:
}
// Cleanup
lock2.Unlock()
}
// Verify that once Unlock is called, we don't keep trying to renew the original
@@ -237,7 +226,7 @@ func testPostgresSQLLockRenewal(t *testing.T, ha physical.HABackend) {
t.Fatalf("should be held")
}
if val != "bar" {
t.Fatalf("bad value: %v", err)
t.Fatalf("bad value: %v", val)
}
// Release the lock, which will delete the stored item
@@ -280,7 +269,7 @@ func testPostgresSQLLockRenewal(t *testing.T, ha physical.HABackend) {
t.Fatalf("should be held")
}
if val != "baz" {
t.Fatalf("bad value: %v", err)
t.Fatalf("bad value: %v", val)
}
// Cleanup