From a0a2da238e419ee79a91666cfceec0063ba5cd0a Mon Sep 17 00:00:00 2001 From: Alexander Kukushkin Date: Tue, 2 Apr 2019 16:49:21 +0200 Subject: [PATCH] Couple of minor improvements (#1019) 1. Fix race condition on shutdown. It is very annoying when you cancel behave tests but postgres remains running. 2. Dump pg_controldata output to logs when "recovering" stopped postgres. It will help to investigate some annoying issues. --- patroni/__init__.py | 19 +++++++++++++++---- patroni/ha.py | 1 + 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/patroni/__init__.py b/patroni/__init__.py index 72e11145..c1e06073 100644 --- a/patroni/__init__.py +++ b/patroni/__init__.py @@ -85,9 +85,10 @@ class Patroni(object): self._received_sighup = True def sigterm_handler(self, *args): - if not self._received_sigterm: - self._received_sigterm = True - sys.exit() + with self._sigterm_lock: + if not self._received_sigterm: + self._received_sigterm = True + sys.exit() @property def noloadbalance(self): @@ -106,11 +107,16 @@ class Patroni(object): elif self.ha.watch(nap_time): self.next_run = time.time() + @property + def received_sigterm(self): + with self._sigterm_lock: + return self._received_sigterm + def run(self): self.api.start() self.next_run = time.time() - while not self._received_sigterm: + while not self.received_sigterm: if self._received_sighup: self._received_sighup = False if self.config.reload_local_configuration(): @@ -128,13 +134,18 @@ class Patroni(object): self.schedule_next_run() def setup_signal_handlers(self): + from threading import Lock + self._received_sighup = False + self._sigterm_lock = Lock() self._received_sigterm = False if os.name != 'nt': signal.signal(signal.SIGHUP, self.sighup_handler) signal.signal(signal.SIGTERM, self.sigterm_handler) def shutdown(self): + with self._sigterm_lock: + self._received_sigterm = True try: self.api.shutdown() except Exception: diff --git a/patroni/ha.py b/patroni/ha.py index c242abb1..826e9759 100644 --- a/patroni/ha.py +++ b/patroni/ha.py @@ -305,6 +305,7 @@ class Ha(object): timeout = None data = self.state_handler.controldata() + logger.info('pg_controldata:\n%s\n', '\n'.join(' {0}: {1}'.format(k, v) for k, v in data.items())) if data.get('Database cluster state') in ('in production', 'shutting down', 'in crash recovery') and \ not self._crash_recovery_executed and (self.cluster.is_unlocked() or self.state_handler.can_rewind): self._crash_recovery_executed = True