mirror of
https://github.com/outbackdingo/patroni.git
synced 2026-01-27 10:20:10 +00:00
Reduce chattiness of Patroni logs (#1955)
1. When everything goes normal, only one line will be written for every run of HA loop (see examples): ``` INFO: no action. I am (postgresql0) the leader with the lock INFO: no action. I am a secondary (postgresql1) and following a leader (postgresql0) ``` 2. The `does not have lock` became a debug message. 3. The `Lock owner: postgresql0; I am postgresql1` will be shown only when stream doesn't look normal.
This commit is contained in:
committed by
GitHub
parent
03e71b6717
commit
f403719bb4
@@ -404,7 +404,7 @@ class Ha(object):
|
||||
self.state_handler.set_role('replica')
|
||||
|
||||
if not node_to_follow:
|
||||
return 'no action'
|
||||
return 'no action. I am ({0})'.format(self.state_handler.name)
|
||||
elif is_leader:
|
||||
self.demote('immediate-nolock')
|
||||
return demote_reason
|
||||
@@ -992,13 +992,13 @@ class Ha(object):
|
||||
# in case of standby cluster we don't really need to
|
||||
# enforce anything, since the leader is not a master.
|
||||
# So just remind the role.
|
||||
msg = 'no action. i am the standby leader with the lock' \
|
||||
msg = 'no action. I am ({0}) the standby leader with the lock'.format(self.state_handler.name) \
|
||||
if self.state_handler.role == 'standby_leader' else \
|
||||
'promoted self to a standby leader because i had the session lock'
|
||||
return self.enforce_follow_remote_master(msg)
|
||||
else:
|
||||
return self.enforce_master_role(
|
||||
'no action. i am the leader with the lock',
|
||||
'no action. I am ({0}) the leader with the lock'.format(self.state_handler.name),
|
||||
'promoted self to leader because i had the session lock'
|
||||
)
|
||||
else:
|
||||
@@ -1012,12 +1012,15 @@ class Ha(object):
|
||||
else:
|
||||
return 'not promoting because failed to update leader lock in DCS'
|
||||
else:
|
||||
logger.info('does not have lock')
|
||||
logger.debug('does not have lock')
|
||||
lock_owner = self.cluster.leader and self.cluster.leader.name
|
||||
if self.is_standby_cluster():
|
||||
return self.follow('cannot be a real master in standby cluster',
|
||||
'no action. i am a secondary and i am following a standby leader', refresh=False)
|
||||
return self.follow('demoting self because i do not have the lock and i was a leader',
|
||||
'no action. i am a secondary and i am following a leader', refresh=False)
|
||||
return self.follow('cannot be a real primary in a standby cluster',
|
||||
'no action. I am a secondary ({0}) and following a standby leader ({1})'.format(
|
||||
self.state_handler.name, lock_owner), refresh=False)
|
||||
return self.follow('demoting self because I do not have the lock and I was a leader',
|
||||
'no action. I am a secondary ({0}) and following a leader ({1})'.format(
|
||||
self.state_handler.name, lock_owner), refresh=False)
|
||||
|
||||
def evaluate_scheduled_restart(self):
|
||||
if self._async_executor.busy: # Restart already in progress
|
||||
|
||||
@@ -166,6 +166,8 @@ class PatroniLogger(Thread):
|
||||
self._root_logger.addHandler(self._queue_handler)
|
||||
self._root_logger.removeHandler(self._proxy_handler)
|
||||
|
||||
prev_record = None
|
||||
|
||||
while True:
|
||||
self._close_old_handlers()
|
||||
|
||||
@@ -173,7 +175,18 @@ class PatroniLogger(Thread):
|
||||
if record is None:
|
||||
break
|
||||
|
||||
self.log_handler.handle(record)
|
||||
if self._root_logger.level == logging.INFO:
|
||||
if record.msg.startswith('Lock owner: '):
|
||||
prev_record, record = record, None
|
||||
else:
|
||||
if prev_record and prev_record.thread == record.thread:
|
||||
if not (record.msg.startswith('no action. ') or record.msg.startswith('PAUSE: no action')):
|
||||
self.log_handler.handle(prev_record)
|
||||
prev_record = None
|
||||
|
||||
if record:
|
||||
self.log_handler.handle(record)
|
||||
|
||||
self._queue_handler.queue.task_done()
|
||||
|
||||
def shutdown(self):
|
||||
|
||||
@@ -329,7 +329,7 @@ class TestHa(PostgresInit):
|
||||
self.ha.has_lock = true
|
||||
self.p.is_leader = false
|
||||
self.p.set_role('master')
|
||||
self.assertEqual(self.ha.run_cycle(), 'no action. i am the leader with the lock')
|
||||
self.assertEqual(self.ha.run_cycle(), 'no action. I am (postgresql0) the leader with the lock')
|
||||
|
||||
def test_demote_after_failing_to_obtain_lock(self):
|
||||
self.ha.acquire_lock = false
|
||||
@@ -369,12 +369,12 @@ class TestHa(PostgresInit):
|
||||
self.ha.cluster = get_cluster_initialized_with_leader()
|
||||
self.ha.cluster.is_unlocked = false
|
||||
self.ha.has_lock = true
|
||||
self.assertEqual(self.ha.run_cycle(), 'no action. i am the leader with the lock')
|
||||
self.assertEqual(self.ha.run_cycle(), 'no action. I am (postgresql0) the leader with the lock')
|
||||
|
||||
def test_demote_because_not_having_lock(self):
|
||||
self.ha.cluster.is_unlocked = false
|
||||
with patch.object(Watchdog, 'is_running', PropertyMock(return_value=True)):
|
||||
self.assertEqual(self.ha.run_cycle(), 'demoting self because i do not have the lock and i was a leader')
|
||||
self.assertEqual(self.ha.run_cycle(), 'demoting self because I do not have the lock and I was a leader')
|
||||
|
||||
def test_demote_because_update_lock_failed(self):
|
||||
self.ha.cluster.is_unlocked = false
|
||||
@@ -388,16 +388,16 @@ class TestHa(PostgresInit):
|
||||
def test_follow(self):
|
||||
self.ha.cluster.is_unlocked = false
|
||||
self.p.is_leader = false
|
||||
self.assertEqual(self.ha.run_cycle(), 'no action. i am a secondary and i am following a leader')
|
||||
self.assertEqual(self.ha.run_cycle(), 'no action. I am a secondary (postgresql0) and following a leader ()')
|
||||
self.ha.patroni.replicatefrom = "foo"
|
||||
self.p.config.check_recovery_conf = Mock(return_value=(True, False))
|
||||
self.ha.cluster.config.data.update({'slots': {'l': {'database': 'a', 'plugin': 'b'}}})
|
||||
self.ha.cluster.members[1].data['tags']['replicatefrom'] = 'postgresql0'
|
||||
self.ha.patroni.nofailover = True
|
||||
self.assertEqual(self.ha.run_cycle(), 'no action. i am a secondary and i am following a leader')
|
||||
self.assertEqual(self.ha.run_cycle(), 'no action. I am a secondary (postgresql0) and following a leader ()')
|
||||
del self.ha.cluster.config.data['slots']
|
||||
self.ha.cluster.config.data.update({'postgresql': {'use_slots': False}})
|
||||
self.assertEqual(self.ha.run_cycle(), 'no action. i am a secondary and i am following a leader')
|
||||
self.assertEqual(self.ha.run_cycle(), 'no action. I am a secondary (postgresql0) and following a leader ()')
|
||||
del self.ha.cluster.config.data['postgresql']['use_slots']
|
||||
|
||||
def test_follow_in_pause(self):
|
||||
@@ -405,7 +405,7 @@ class TestHa(PostgresInit):
|
||||
self.ha.is_paused = true
|
||||
self.assertEqual(self.ha.run_cycle(), 'PAUSE: continue to run as master without lock')
|
||||
self.p.is_leader = false
|
||||
self.assertEqual(self.ha.run_cycle(), 'PAUSE: no action')
|
||||
self.assertEqual(self.ha.run_cycle(), 'PAUSE: no action. I am (postgresql0)')
|
||||
|
||||
@patch.object(Rewind, 'rewind_or_reinitialize_needed_and_possible', Mock(return_value=True))
|
||||
@patch.object(Rewind, 'can_rewind', PropertyMock(return_value=True))
|
||||
@@ -516,27 +516,27 @@ class TestHa(PostgresInit):
|
||||
self.ha.fetch_node_status = get_node_status()
|
||||
self.ha.has_lock = true
|
||||
self.ha.cluster = get_cluster_initialized_with_leader(Failover(0, 'blabla', '', None))
|
||||
self.assertEqual(self.ha.run_cycle(), 'no action. i am the leader with the lock')
|
||||
self.assertEqual(self.ha.run_cycle(), 'no action. I am (postgresql0) the leader with the lock')
|
||||
self.ha.cluster = get_cluster_initialized_with_leader(Failover(0, '', self.p.name, None))
|
||||
self.assertEqual(self.ha.run_cycle(), 'no action. i am the leader with the lock')
|
||||
self.assertEqual(self.ha.run_cycle(), 'no action. I am (postgresql0) the leader with the lock')
|
||||
self.ha.cluster = get_cluster_initialized_with_leader(Failover(0, '', 'blabla', None))
|
||||
self.assertEqual(self.ha.run_cycle(), 'no action. i am the leader with the lock')
|
||||
self.assertEqual(self.ha.run_cycle(), 'no action. I am (postgresql0) the leader with the lock')
|
||||
f = Failover(0, self.p.name, '', None)
|
||||
self.ha.cluster = get_cluster_initialized_with_leader(f)
|
||||
self.assertEqual(self.ha.run_cycle(), 'manual failover: demoting myself')
|
||||
self.ha._rewind.rewind_or_reinitialize_needed_and_possible = true
|
||||
self.assertEqual(self.ha.run_cycle(), 'manual failover: demoting myself')
|
||||
self.ha.fetch_node_status = get_node_status(nofailover=True)
|
||||
self.assertEqual(self.ha.run_cycle(), 'no action. i am the leader with the lock')
|
||||
self.assertEqual(self.ha.run_cycle(), 'no action. I am (postgresql0) the leader with the lock')
|
||||
self.ha.fetch_node_status = get_node_status(watchdog_failed=True)
|
||||
self.assertEqual(self.ha.run_cycle(), 'no action. i am the leader with the lock')
|
||||
self.assertEqual(self.ha.run_cycle(), 'no action. I am (postgresql0) the leader with the lock')
|
||||
self.ha.fetch_node_status = get_node_status(timeline=1)
|
||||
self.assertEqual(self.ha.run_cycle(), 'no action. i am the leader with the lock')
|
||||
self.assertEqual(self.ha.run_cycle(), 'no action. I am (postgresql0) the leader with the lock')
|
||||
self.ha.fetch_node_status = get_node_status(wal_position=1)
|
||||
self.assertEqual(self.ha.run_cycle(), 'no action. i am the leader with the lock')
|
||||
self.assertEqual(self.ha.run_cycle(), 'no action. I am (postgresql0) the leader with the lock')
|
||||
# manual failover from the previous leader to us won't happen if we hold the nofailover flag
|
||||
self.ha.cluster = get_cluster_initialized_with_leader(Failover(0, 'blabla', self.p.name, None))
|
||||
self.assertEqual(self.ha.run_cycle(), 'no action. i am the leader with the lock')
|
||||
self.assertEqual(self.ha.run_cycle(), 'no action. I am (postgresql0) the leader with the lock')
|
||||
|
||||
# Failover scheduled time must include timezone
|
||||
scheduled = datetime.datetime.now()
|
||||
@@ -545,28 +545,28 @@ class TestHa(PostgresInit):
|
||||
|
||||
scheduled = datetime.datetime.utcnow().replace(tzinfo=tzutc)
|
||||
self.ha.cluster = get_cluster_initialized_with_leader(Failover(0, 'blabla', self.p.name, scheduled))
|
||||
self.assertEqual('no action. i am the leader with the lock', self.ha.run_cycle())
|
||||
self.assertEqual('no action. I am (postgresql0) the leader with the lock', self.ha.run_cycle())
|
||||
|
||||
scheduled = scheduled + datetime.timedelta(seconds=30)
|
||||
self.ha.cluster = get_cluster_initialized_with_leader(Failover(0, 'blabla', self.p.name, scheduled))
|
||||
self.assertEqual('no action. i am the leader with the lock', self.ha.run_cycle())
|
||||
self.assertEqual('no action. I am (postgresql0) the leader with the lock', self.ha.run_cycle())
|
||||
|
||||
scheduled = scheduled + datetime.timedelta(seconds=-600)
|
||||
self.ha.cluster = get_cluster_initialized_with_leader(Failover(0, 'blabla', self.p.name, scheduled))
|
||||
self.assertEqual('no action. i am the leader with the lock', self.ha.run_cycle())
|
||||
self.assertEqual('no action. I am (postgresql0) the leader with the lock', self.ha.run_cycle())
|
||||
|
||||
scheduled = None
|
||||
self.ha.cluster = get_cluster_initialized_with_leader(Failover(0, 'blabla', self.p.name, scheduled))
|
||||
self.assertEqual('no action. i am the leader with the lock', self.ha.run_cycle())
|
||||
self.assertEqual('no action. I am (postgresql0) the leader with the lock', self.ha.run_cycle())
|
||||
|
||||
def test_manual_failover_from_leader_in_pause(self):
|
||||
self.ha.has_lock = true
|
||||
self.ha.is_paused = true
|
||||
scheduled = datetime.datetime.now()
|
||||
self.ha.cluster = get_cluster_initialized_with_leader(Failover(0, 'blabla', self.p.name, scheduled))
|
||||
self.assertEqual('PAUSE: no action. i am the leader with the lock', self.ha.run_cycle())
|
||||
self.assertEqual('PAUSE: no action. I am (postgresql0) the leader with the lock', self.ha.run_cycle())
|
||||
self.ha.cluster = get_cluster_initialized_with_leader(Failover(0, self.p.name, '', None))
|
||||
self.assertEqual('PAUSE: no action. i am the leader with the lock', self.ha.run_cycle())
|
||||
self.assertEqual('PAUSE: no action. I am (postgresql0) the leader with the lock', self.ha.run_cycle())
|
||||
|
||||
def test_manual_failover_from_leader_in_synchronous_mode(self):
|
||||
self.p.is_leader = true
|
||||
@@ -575,7 +575,7 @@ class TestHa(PostgresInit):
|
||||
self.ha.is_failover_possible = false
|
||||
self.ha.process_sync_replication = Mock()
|
||||
self.ha.cluster = get_cluster_initialized_with_leader(Failover(0, self.p.name, 'a', None), (self.p.name, None))
|
||||
self.assertEqual('no action. i am the leader with the lock', self.ha.run_cycle())
|
||||
self.assertEqual('no action. I am (postgresql0) the leader with the lock', self.ha.run_cycle())
|
||||
self.ha.cluster = get_cluster_initialized_with_leader(Failover(0, self.p.name, 'a', None), (self.p.name, 'a'))
|
||||
self.ha.is_failover_possible = true
|
||||
self.assertEqual('manual failover: demoting myself', self.ha.run_cycle())
|
||||
@@ -736,7 +736,7 @@ class TestHa(PostgresInit):
|
||||
self.p.config.check_recovery_conf = Mock(return_value=(False, False))
|
||||
self.ha._leader_timeline = 1
|
||||
self.assertEqual(self.ha.run_cycle(), 'promoted self to a standby leader because i had the session lock')
|
||||
self.assertEqual(self.ha.run_cycle(), 'no action. i am the standby leader with the lock')
|
||||
self.assertEqual(self.ha.run_cycle(), 'no action. I am (leader) the standby leader with the lock')
|
||||
self.p.set_role('replica')
|
||||
self.p.config.check_recovery_conf = Mock(return_value=(True, False))
|
||||
self.assertEqual(self.ha.run_cycle(), 'promoted self to a standby leader because i had the session lock')
|
||||
@@ -745,7 +745,8 @@ class TestHa(PostgresInit):
|
||||
self.p.is_leader = false
|
||||
self.p.name = 'replica'
|
||||
self.ha.cluster = get_standby_cluster_initialized_with_only_leader()
|
||||
self.assertEqual(self.ha.run_cycle(), 'no action. i am a secondary and i am following a standby leader')
|
||||
self.assertEqual(self.ha.run_cycle(),
|
||||
'no action. I am a secondary (replica) and following a standby leader (leader)')
|
||||
with patch.object(Leader, 'conn_url', PropertyMock(return_value='')):
|
||||
self.assertEqual(self.ha.run_cycle(), 'continue following the old known standby leader')
|
||||
|
||||
@@ -838,7 +839,8 @@ class TestHa(PostgresInit):
|
||||
|
||||
self.ha.has_lock = false
|
||||
self.p.is_leader = false
|
||||
self.assertEqual(self.ha.run_cycle(), 'no action. i am a secondary and i am following a leader')
|
||||
self.assertEqual(self.ha.run_cycle(),
|
||||
'no action. I am a secondary (postgresql0) and following a leader (leader)')
|
||||
check_calls([(update_lock, False), (demote, False)])
|
||||
|
||||
def test_manual_failover_while_starting(self):
|
||||
@@ -1063,7 +1065,7 @@ class TestHa(PostgresInit):
|
||||
self.ha.cluster.config.data.clear()
|
||||
self.ha.has_lock = true
|
||||
self.ha.cluster.is_unlocked = false
|
||||
self.assertEqual(self.ha.run_cycle(), 'no action. i am the leader with the lock')
|
||||
self.assertEqual(self.ha.run_cycle(), 'no action. I am (postgresql0) the leader with the lock')
|
||||
|
||||
def test_watch(self):
|
||||
self.ha.cluster = get_cluster_initialized_with_leader()
|
||||
@@ -1098,7 +1100,7 @@ class TestHa(PostgresInit):
|
||||
self.ha.cluster.is_unlocked = false
|
||||
for tl in (1, 3):
|
||||
self.p.get_master_timeline = Mock(return_value=tl)
|
||||
self.assertEqual(self.ha.run_cycle(), 'no action. i am the leader with the lock')
|
||||
self.assertEqual(self.ha.run_cycle(), 'no action. I am (postgresql0) the leader with the lock')
|
||||
|
||||
@patch('sys.exit', return_value=1)
|
||||
def test_abort_join(self, exit_mock):
|
||||
@@ -1111,9 +1113,9 @@ class TestHa(PostgresInit):
|
||||
self.ha.has_lock = true
|
||||
self.ha.cluster.is_unlocked = false
|
||||
self.ha.is_paused = true
|
||||
self.assertEqual(self.ha.run_cycle(), 'PAUSE: no action. i am the leader with the lock')
|
||||
self.assertEqual(self.ha.run_cycle(), 'PAUSE: no action. I am (postgresql0) the leader with the lock')
|
||||
self.ha.is_paused = false
|
||||
self.assertEqual(self.ha.run_cycle(), 'no action. i am the leader with the lock')
|
||||
self.assertEqual(self.ha.run_cycle(), 'no action. I am (postgresql0) the leader with the lock')
|
||||
|
||||
@patch('psycopg2.connect', psycopg2_connect)
|
||||
def test_permanent_logical_slots_after_promote(self):
|
||||
@@ -1123,7 +1125,7 @@ class TestHa(PostgresInit):
|
||||
self.assertEqual(self.ha.run_cycle(), 'acquired session lock as a leader')
|
||||
self.ha.cluster = get_cluster_initialized_without_leader(leader=True, cluster_config=config)
|
||||
self.ha.has_lock = true
|
||||
self.assertEqual(self.ha.run_cycle(), 'no action. i am the leader with the lock')
|
||||
self.assertEqual(self.ha.run_cycle(), 'no action. I am (other) the leader with the lock')
|
||||
|
||||
@patch.object(Cluster, 'has_member', true)
|
||||
def test_run_cycle(self):
|
||||
|
||||
@@ -63,3 +63,12 @@ class TestPatroniLogger(unittest.TestCase):
|
||||
self.assertRaises(Exception, logger.shutdown)
|
||||
self.assertLessEqual(logger.queue_size, 2) # "Failed to close the old log handler" could be still in the queue
|
||||
self.assertEqual(logger.records_lost, 0)
|
||||
|
||||
def test_interceptor(self):
|
||||
logger = PatroniLogger()
|
||||
logger.reload_config({'level': 'INFO'})
|
||||
logger.start()
|
||||
_LOG.info('Lock owner: ')
|
||||
_LOG.info('blabla')
|
||||
logger.shutdown()
|
||||
self.assertEqual(logger.records_lost, 0)
|
||||
|
||||
Reference in New Issue
Block a user