mirror of
https://github.com/outbackdingo/patroni.git
synced 2026-01-27 10:20:10 +00:00
Detect and solve inconsistency between /sync and actual sync nodes (#2877)
Patroni is changing `synchronous_standby_names` and the `/sync` key in a very specific order, first we add nodes to `synchronous_standby_names` and only after, when they are recognized as synchronous they are added to the `/sync` key. When removing nodes the order is different: they are first removed from the `/sync` key and only after that from the `synchronous_standby_names`. As a result Patroni expects that either actual synchronous nodes will match with the nodes listed in the `/sync` key or that new candidates to synchronous nodes will not match with nodes listed in the `/sync` key. In case if `synchronous_standby_names` was removed from the `postgresql.conf`, manually, or due the the bug (#2876), the state becomes inconsistent because of the wrong order of updates. To solve inconsistent state we introduce additional checks and will update the `/sync` key with actual names of synchronous nodes (usually empty set).
This commit is contained in:
@@ -654,6 +654,14 @@ class Ha(object):
|
||||
current = CaseInsensitiveSet(sync.members)
|
||||
picked, allow_promote = self.state_handler.sync_handler.current_state(self.cluster)
|
||||
|
||||
if picked == current and current != allow_promote:
|
||||
logger.warning('Inconsistent state between synchronous_standby_names = %s and /sync = %s key '
|
||||
'detected, updating synchronous replication key...', list(allow_promote), list(current))
|
||||
sync = self.dcs.write_sync_state(self.state_handler.name, allow_promote, version=sync.version)
|
||||
if not sync:
|
||||
return logger.warning("Updating sync state failed")
|
||||
current = CaseInsensitiveSet(sync.members)
|
||||
|
||||
if picked != current:
|
||||
# update synchronous standby list in dcs temporarily to point to common nodes in current and picked
|
||||
sync_common = current & allow_promote
|
||||
|
||||
@@ -1313,6 +1313,24 @@ class TestHa(PostgresInit):
|
||||
self.ha.run_cycle()
|
||||
self.assertEqual(mock_logger.call_args[0][0], 'Updating sync state failed')
|
||||
|
||||
@patch.object(Cluster, 'is_unlocked', Mock(return_value=False))
|
||||
def test_inconsistent_synchronous_state(self):
|
||||
self.ha.is_synchronous_mode = true
|
||||
self.ha.has_lock = true
|
||||
self.p.name = 'leader'
|
||||
self.ha.cluster = get_cluster_initialized_without_leader(sync=('leader', 'a'))
|
||||
self.p.sync_handler.current_state = Mock(return_value=(CaseInsensitiveSet('a'), CaseInsensitiveSet()))
|
||||
self.ha.dcs.write_sync_state = Mock(return_value=SyncState.empty())
|
||||
mock_set_sync = self.p.sync_handler.set_synchronous_standby_names = Mock()
|
||||
with patch('patroni.ha.logger.warning') as mock_logger:
|
||||
self.ha.run_cycle()
|
||||
mock_set_sync.assert_called_once()
|
||||
self.assertTrue(mock_logger.call_args_list[0][0][0].startswith('Inconsistent state between '))
|
||||
self.ha.dcs.write_sync_state = Mock(return_value=None)
|
||||
with patch('patroni.ha.logger.warning') as mock_logger:
|
||||
self.ha.run_cycle()
|
||||
self.assertEqual(mock_logger.call_args[0][0], 'Updating sync state failed')
|
||||
|
||||
def test_effective_tags(self):
|
||||
self.ha._disable_sync = True
|
||||
self.assertEqual(self.ha.get_effective_tags(), {'foo': 'bar', 'nosync': True})
|
||||
|
||||
Reference in New Issue
Block a user