Sync replication slots and verify sysid after coming out of pause (#678)

Fixes https://github.com/zalando/patroni/issues/568
and https://github.com/zalando/patroni/issues/674
This commit is contained in:
Alexander Kukushkin
2018-05-18 12:18:49 +02:00
committed by GitHub
parent 4ce539ba1b
commit 856552bd61
3 changed files with 23 additions and 0 deletions

View File

@@ -57,6 +57,7 @@ class Ha(object):
self.dcs = patroni.dcs
self.cluster = None
self.old_cluster = None
self._was_paused = False
self._leader_timeline = None
self.recovering = False
self._post_bootstrap_task = None
@@ -1047,6 +1048,11 @@ class Ha(object):
if self.is_paused():
self.watchdog.disable()
self._was_paused = True
else:
if self._was_paused:
self.state_handler.schedule_sanity_checks_after_pause()
self._was_paused = False
if not self.cluster.has_member(self.state_handler.name):
self.touch_member()

View File

@@ -1848,3 +1848,12 @@ $$""".format(name, ' '.join(options)), name, password, password)
with self._cancellable_lock:
if self._cancellable is not None and self._cancellable.returncode is None:
self._cancellable.kill()
def schedule_sanity_checks_after_pause(self):
"""
After coming out of pause we have to:
1. sync replication slots, because it might happen that slots were removed
2. get new 'Database system identifier' to make sure that it wasn't changed
"""
self._schedule_load_slots = self.use_slots
self._sysid = None

View File

@@ -915,3 +915,11 @@ class TestHa(unittest.TestCase):
self.p.is_leader = false
self.ha.run_cycle()
exit_mock.assert_called_once_with(1)
def test_after_pause(self):
self.ha.has_lock = true
self.ha.cluster.is_unlocked = false
self.ha.is_paused = true
self.assertEquals(self.ha.run_cycle(), 'PAUSE: no action. i am the leader with the lock')
self.ha.is_paused = false
self.assertEquals(self.ha.run_cycle(), 'no action. i am the leader with the lock')