mirror of
https://github.com/outbackdingo/patroni.git
synced 2026-01-27 10:20:10 +00:00
reset failsafe state when promote (#2803)
consider the scenario(enable failsafe_mode): 0. node1(primary) - node2(replica) 1. stop all etcd nodes; wait ttl seconds; start all etcd nodes; (node2's failsafe will contain the info about node1) 2. switchover to node2; (node2's failsafe still contain the info about node1) 3. stop all etcd nodes; wait ttl seconds; start all etcd nodes; 4. node2 will demote because it consider node1 as primary Resetting failsafe state when running as a primary fixes the issue.
This commit is contained in:
committed by
Alexander Kukushkin
parent
84aac437c1
commit
783112385f
@@ -83,11 +83,7 @@ class Failsafe(object):
|
||||
def __init__(self, dcs: AbstractDCS) -> None:
|
||||
self._lock = RLock()
|
||||
self._dcs = dcs
|
||||
self._last_update = 0
|
||||
self._name = None
|
||||
self._conn_url = None
|
||||
self._api_url = None
|
||||
self._slots = None
|
||||
self._reset_state()
|
||||
|
||||
def update(self, data: Dict[str, Any]) -> None:
|
||||
with self._lock:
|
||||
@@ -97,6 +93,13 @@ class Failsafe(object):
|
||||
self._api_url = data['api_url']
|
||||
self._slots = data.get('slots')
|
||||
|
||||
def _reset_state(self) -> None:
|
||||
self._last_update = 0
|
||||
self._name = None
|
||||
self._conn_url = None
|
||||
self._api_url = None
|
||||
self._slots = None
|
||||
|
||||
@property
|
||||
def leader(self) -> Optional[Leader]:
|
||||
with self._lock:
|
||||
@@ -130,6 +133,8 @@ class Failsafe(object):
|
||||
def set_is_active(self, value: float) -> None:
|
||||
with self._lock:
|
||||
self._last_update = value
|
||||
if not value:
|
||||
self._reset_state()
|
||||
|
||||
|
||||
class Ha(object):
|
||||
@@ -777,6 +782,9 @@ class Ha(object):
|
||||
self.state_handler.sync_handler.set_synchronous_standby_names(
|
||||
CaseInsensitiveSet('*') if self.global_config.is_synchronous_mode_strict else CaseInsensitiveSet())
|
||||
if self.state_handler.role not in ('master', 'promoted', 'primary'):
|
||||
# reset failsafe state when promote
|
||||
self._failsafe.set_is_active(0)
|
||||
|
||||
def before_promote():
|
||||
self.notify_citus_coordinator('before_promote')
|
||||
|
||||
|
||||
Reference in New Issue
Block a user