mirror of
https://github.com/outbackdingo/patroni.git
synced 2026-01-27 10:20:10 +00:00
Ensure watchdog configuration matches bootstrap.dcs config and log changes (#2480)
Fix issue of patroni configuring watchdog with defaults when bootstrapping a new cluster rather than taking configuration used to bootstrap the DCS. Also log changes to watchdog configuration based on calculated timeout value. Close #2470
This commit is contained in:
@@ -15,7 +15,7 @@ def polling_loop(timeout, interval=1):
|
||||
|
||||
@step('I start {name:w} with watchdog')
|
||||
def start_patroni_with_watchdog(context, name):
|
||||
return context.pctl.start(name, custom_config={'watchdog': True})
|
||||
return context.pctl.start(name, custom_config={'watchdog': True, 'bootstrap': {'dcs': {'ttl': 20}}})
|
||||
|
||||
|
||||
@step('{name:w} watchdog has been pinged after {timeout:d} seconds')
|
||||
@@ -31,6 +31,11 @@ def watchdog_was_closed(context, name):
|
||||
assert context.pctl.get_watchdog(name).was_closed
|
||||
|
||||
|
||||
@step('{name:w} watchdog has a {timeout:d} second timeout')
|
||||
def watchdog_has_timeout(context, name, timeout):
|
||||
assert context.pctl.get_watchdog(name).timeout == timeout
|
||||
|
||||
|
||||
@step('I reset {name:w} watchdog state')
|
||||
def watchdog_reset_pinged(context, name):
|
||||
context.pctl.get_watchdog(name).reset()
|
||||
|
||||
@@ -6,6 +6,14 @@ Feature: watchdog
|
||||
Then postgres0 is a leader after 10 seconds
|
||||
And postgres0 role is the primary after 10 seconds
|
||||
And postgres0 watchdog has been pinged after 10 seconds
|
||||
And postgres0 watchdog has a 15 second timeout
|
||||
|
||||
Scenario: watchdog is reconfigured after global ttl changed
|
||||
Given I run patronictl.py edit-config batman -s ttl=30 --force
|
||||
Then I receive a response returncode 0
|
||||
And I receive a response output "+ttl: 30"
|
||||
When I sleep for 4 seconds
|
||||
Then postgres0 watchdog has a 25 second timeout
|
||||
|
||||
Scenario: watchdog is disabled during pause
|
||||
Given I run patronictl.py pause batman
|
||||
|
||||
@@ -47,6 +47,7 @@ class Patroni(AbstractPatroniDaemon):
|
||||
elif not self.config.dynamic_configuration and 'bootstrap' in self.config:
|
||||
if self.config.set_dynamic_configuration(self.config['bootstrap']['dcs']):
|
||||
self.dcs.reload_config(self.config)
|
||||
self.watchdog.reload_config(self.config)
|
||||
break
|
||||
except DCSError:
|
||||
logger.warning('Can not get cluster from dcs')
|
||||
|
||||
@@ -215,6 +215,10 @@ class Watchdog(object):
|
||||
self._activate()
|
||||
if self.config.timeout != self.active_config.timeout:
|
||||
self.impl.set_timeout(self.config.timeout)
|
||||
if self.is_running:
|
||||
logger.info("{0} updated with {1} second timeout, timing slack {2} seconds"
|
||||
.format(self.impl.describe(), self.impl.get_timeout(), self.config.timing_slack))
|
||||
self.active_config = self.config
|
||||
except WatchdogError as e:
|
||||
logger.error("Error while sending keepalive: %s", e)
|
||||
|
||||
|
||||
@@ -164,6 +164,13 @@ class TestWatchdog(unittest.TestCase):
|
||||
|
||||
watchdog.reload_config({'ttl': 60, 'loop_wait': 15, 'watchdog': {'mode': 'required'}})
|
||||
watchdog.keepalive()
|
||||
self.assertTrue(watchdog.is_running)
|
||||
self.assertEqual(watchdog.config.timeout, 60 - 5)
|
||||
|
||||
watchdog.reload_config({'ttl': 60, 'loop_wait': 15, 'watchdog': {'mode': 'required', 'safety_margin': -1}})
|
||||
watchdog.keepalive()
|
||||
self.assertTrue(watchdog.is_running)
|
||||
self.assertEqual(watchdog.config.timeout, 60 // 2)
|
||||
|
||||
|
||||
class TestNullWatchdog(unittest.TestCase):
|
||||
|
||||
Reference in New Issue
Block a user