Shorten and beautify history log output (#1526)

when Patroni is trying to figure out the necessity of pg_rewind it could write the content history file from the primary into the log. The history file is growing with every failover/switchover and eventually starts taking too many lines in the log, most of them are not so much useful.
Instead of showing the raw data, we will show only 3 lines before the current replica timeline and 2 lines after.
This commit is contained in:
Alexander Kukushkin
2020-05-15 16:14:25 +02:00
committed by GitHub
parent 7cf0b753ab
commit 30aa355eb5
4 changed files with 39 additions and 7 deletions

View File

@@ -15,6 +15,11 @@ REWIND_STATUS = type('Enum', (), {'INITIAL': 0, 'CHECKPOINT': 1, 'CHECK': 2, 'NE
'NOT_NEED': 4, 'SUCCESS': 5, 'FAILED': 6})
def format_lsn(lsn, full=False):
template = '{0:X}/{1:08X}' if full else '{0:X}/{1:X}'
return template.format(lsn >> 32, lsn & 0xFFFFFFFF)
class Rewind(object):
def __init__(self, postgresql):
@@ -88,6 +93,24 @@ class Rewind(object):
logger.info('Local timeline=%s lsn=%s', timeline, lsn)
return timeline, lsn
@staticmethod
def _log_master_history(history, i):
start = max(0, i - 3)
end = None if i + 4 >= len(history) else i + 2
history_show = []
def format_history_line(l):
return '{0}\t{1}\t{2}'.format(l[0], format_lsn(l[1]), l[2])
for line in history[start:end]:
history_show.append(format_history_line(line))
if line != history[-1]:
history_show.append('...')
history_show.append(format_history_line(history[-1]))
logger.info('master: history=%s', '\n'.join(history_show))
def _check_timeline_and_lsn(self, leader):
local_timeline, local_lsn = self._get_local_timeline_lsn()
if local_timeline is None or local_lsn is None:
@@ -108,17 +131,18 @@ class Rewind(object):
logger.info('master_timeline=%s', master_timeline)
if local_timeline > master_timeline: # Not always supported by pg_rewind
need_rewind = True
elif local_timeline == master_timeline:
need_rewind = False
elif master_timeline > 1:
cur.execute('TIMELINE_HISTORY %s', (master_timeline,))
history = bytes(cur.fetchone()[1]).decode('utf-8')
logger.info('master: history=%s', history)
else: # local_timeline == master_timeline == 1
need_rewind = False
logger.debug('master: history=%s', history)
except Exception:
return logger.exception('Exception when working with master via replication connection')
if history is not None:
for parent_timeline, switchpoint, _ in parse_history(history):
history = list(parse_history(history))
for i, (parent_timeline, switchpoint, _) in enumerate(history):
if parent_timeline == local_timeline:
try:
need_rewind = parse_lsn(local_lsn) >= switchpoint
@@ -127,6 +151,7 @@ class Rewind(object):
break
elif parent_timeline > local_timeline:
break
self._log_master_history(history, i)
self._state = need_rewind and REWIND_STATUS.NEED or REWIND_STATUS.NOT_NEED

View File

@@ -106,7 +106,7 @@ class MockCursor(object):
('autovacuum', 'on', None, 'bool', 'sighup'),
('unix_socket_directories', '/tmp', None, 'string', 'postmaster')]
elif sql.startswith('IDENTIFY_SYSTEM'):
self.results = [('1', 2, '0/402EEC0', '')]
self.results = [('1', 3, '0/402EEC0', '')]
elif sql.startswith('SELECT isdir, modification'):
self.results = [(False, datetime.datetime.now())]
elif sql.startswith('SELECT pg_catalog.pg_read_file'):

View File

@@ -711,7 +711,7 @@ class TestPostgresql(BaseTestPostgresql):
self.assertTrue(self.p.fix_cluster_state())
def test_replica_cached_timeline(self):
self.assertEqual(self.p.replica_cached_timeline(1), 2)
self.assertEqual(self.p.replica_cached_timeline(2), 3)
def test_get_master_timeline(self):
self.assertEqual(self.p.get_master_timeline(), 1)

View File

@@ -85,6 +85,13 @@ class TestRewind(BaseTestPostgresql):
with patch.object(Postgresql, 'is_running', Mock(return_value=True)):
self.r.execute(self.leader)
@patch('patroni.postgresql.rewind.logger.info')
def test__log_master_history(self, mock_logger):
history = [[n, n, ''] for n in range(1, 10)]
self.r._log_master_history(history, 1)
expected = '\n'.join(['{0}\t0/{0}\t'.format(n) for n in range(1, 4)] + ['...', '9\t0/9\t'])
self.assertEqual(mock_logger.call_args[0][1], expected)
@patch.object(Postgresql, 'start', Mock())
@patch.object(Rewind, 'can_rewind', PropertyMock(return_value=True))
@patch.object(Rewind, '_get_local_timeline_lsn', Mock(return_value=(2, '40159C1')))
@@ -101,7 +108,7 @@ class TestRewind(BaseTestPostgresql):
with patch('psycopg2.connect', Mock(side_effect=Exception)):
self.assertFalse(self.r.rewind_or_reinitialize_needed_and_possible(self.leader))
self.r.trigger_check_diverged_lsn()
with patch.object(MockCursor, 'fetchone', Mock(side_effect=[('', 2, '0/0'), ('', b'3\t0/40159C0\tn\n')])):
with patch.object(MockCursor, 'fetchone', Mock(side_effect=[('', 3, '0/0'), ('', b'3\t0/40159C0\tn\n')])):
self.assertFalse(self.r.rewind_or_reinitialize_needed_and_possible(self.leader))
self.r.trigger_check_diverged_lsn()
with patch.object(MockCursor, 'fetchone', Mock(return_value=('', 1, '0/0'))):