From 30aa355eb51116c97a2a3dbaf68bc512d9af5aaa Mon Sep 17 00:00:00 2001 From: Alexander Kukushkin Date: Fri, 15 May 2020 16:14:25 +0200 Subject: [PATCH] Shorten and beautify history log output (#1526) when Patroni is trying to figure out the necessity of pg_rewind it could write the content history file from the primary into the log. The history file is growing with every failover/switchover and eventually starts taking too many lines in the log, most of them are not so much useful. Instead of showing the raw data, we will show only 3 lines before the current replica timeline and 2 lines after. --- patroni/postgresql/rewind.py | 33 +++++++++++++++++++++++++++++---- tests/__init__.py | 2 +- tests/test_postgresql.py | 2 +- tests/test_rewind.py | 9 ++++++++- 4 files changed, 39 insertions(+), 7 deletions(-) diff --git a/patroni/postgresql/rewind.py b/patroni/postgresql/rewind.py index 268e6699..b30458b8 100644 --- a/patroni/postgresql/rewind.py +++ b/patroni/postgresql/rewind.py @@ -15,6 +15,11 @@ REWIND_STATUS = type('Enum', (), {'INITIAL': 0, 'CHECKPOINT': 1, 'CHECK': 2, 'NE 'NOT_NEED': 4, 'SUCCESS': 5, 'FAILED': 6}) +def format_lsn(lsn, full=False): + template = '{0:X}/{1:08X}' if full else '{0:X}/{1:X}' + return template.format(lsn >> 32, lsn & 0xFFFFFFFF) + + class Rewind(object): def __init__(self, postgresql): @@ -88,6 +93,24 @@ class Rewind(object): logger.info('Local timeline=%s lsn=%s', timeline, lsn) return timeline, lsn + @staticmethod + def _log_master_history(history, i): + start = max(0, i - 3) + end = None if i + 4 >= len(history) else i + 2 + history_show = [] + + def format_history_line(l): + return '{0}\t{1}\t{2}'.format(l[0], format_lsn(l[1]), l[2]) + + for line in history[start:end]: + history_show.append(format_history_line(line)) + + if line != history[-1]: + history_show.append('...') + history_show.append(format_history_line(history[-1])) + + logger.info('master: history=%s', '\n'.join(history_show)) + def _check_timeline_and_lsn(self, leader): local_timeline, local_lsn = self._get_local_timeline_lsn() if local_timeline is None or local_lsn is None: @@ -108,17 +131,18 @@ class Rewind(object): logger.info('master_timeline=%s', master_timeline) if local_timeline > master_timeline: # Not always supported by pg_rewind need_rewind = True + elif local_timeline == master_timeline: + need_rewind = False elif master_timeline > 1: cur.execute('TIMELINE_HISTORY %s', (master_timeline,)) history = bytes(cur.fetchone()[1]).decode('utf-8') - logger.info('master: history=%s', history) - else: # local_timeline == master_timeline == 1 - need_rewind = False + logger.debug('master: history=%s', history) except Exception: return logger.exception('Exception when working with master via replication connection') if history is not None: - for parent_timeline, switchpoint, _ in parse_history(history): + history = list(parse_history(history)) + for i, (parent_timeline, switchpoint, _) in enumerate(history): if parent_timeline == local_timeline: try: need_rewind = parse_lsn(local_lsn) >= switchpoint @@ -127,6 +151,7 @@ class Rewind(object): break elif parent_timeline > local_timeline: break + self._log_master_history(history, i) self._state = need_rewind and REWIND_STATUS.NEED or REWIND_STATUS.NOT_NEED diff --git a/tests/__init__.py b/tests/__init__.py index 8a075d61..2bf33b3d 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -106,7 +106,7 @@ class MockCursor(object): ('autovacuum', 'on', None, 'bool', 'sighup'), ('unix_socket_directories', '/tmp', None, 'string', 'postmaster')] elif sql.startswith('IDENTIFY_SYSTEM'): - self.results = [('1', 2, '0/402EEC0', '')] + self.results = [('1', 3, '0/402EEC0', '')] elif sql.startswith('SELECT isdir, modification'): self.results = [(False, datetime.datetime.now())] elif sql.startswith('SELECT pg_catalog.pg_read_file'): diff --git a/tests/test_postgresql.py b/tests/test_postgresql.py index 858b6f2a..144cd976 100644 --- a/tests/test_postgresql.py +++ b/tests/test_postgresql.py @@ -711,7 +711,7 @@ class TestPostgresql(BaseTestPostgresql): self.assertTrue(self.p.fix_cluster_state()) def test_replica_cached_timeline(self): - self.assertEqual(self.p.replica_cached_timeline(1), 2) + self.assertEqual(self.p.replica_cached_timeline(2), 3) def test_get_master_timeline(self): self.assertEqual(self.p.get_master_timeline(), 1) diff --git a/tests/test_rewind.py b/tests/test_rewind.py index 272eb79a..810cfec9 100644 --- a/tests/test_rewind.py +++ b/tests/test_rewind.py @@ -85,6 +85,13 @@ class TestRewind(BaseTestPostgresql): with patch.object(Postgresql, 'is_running', Mock(return_value=True)): self.r.execute(self.leader) + @patch('patroni.postgresql.rewind.logger.info') + def test__log_master_history(self, mock_logger): + history = [[n, n, ''] for n in range(1, 10)] + self.r._log_master_history(history, 1) + expected = '\n'.join(['{0}\t0/{0}\t'.format(n) for n in range(1, 4)] + ['...', '9\t0/9\t']) + self.assertEqual(mock_logger.call_args[0][1], expected) + @patch.object(Postgresql, 'start', Mock()) @patch.object(Rewind, 'can_rewind', PropertyMock(return_value=True)) @patch.object(Rewind, '_get_local_timeline_lsn', Mock(return_value=(2, '40159C1'))) @@ -101,7 +108,7 @@ class TestRewind(BaseTestPostgresql): with patch('psycopg2.connect', Mock(side_effect=Exception)): self.assertFalse(self.r.rewind_or_reinitialize_needed_and_possible(self.leader)) self.r.trigger_check_diverged_lsn() - with patch.object(MockCursor, 'fetchone', Mock(side_effect=[('', 2, '0/0'), ('', b'3\t0/40159C0\tn\n')])): + with patch.object(MockCursor, 'fetchone', Mock(side_effect=[('', 3, '0/0'), ('', b'3\t0/40159C0\tn\n')])): self.assertFalse(self.r.rewind_or_reinitialize_needed_and_possible(self.leader)) self.r.trigger_check_diverged_lsn() with patch.object(MockCursor, 'fetchone', Mock(return_value=('', 1, '0/0'))):