Take and apply some parameters from controldata when starting as replica (#703)

* Take and apply some parameters from controldata when starting as replica

https://www.postgresql.org/docs/10/static/hot-standby.html#HOT-STANDBY-ADMIN
There is set of parameters which value on the replica must be not smaller than on the primary, otherwise replica will refuse to start:
* max_connections
* max_prepared_transactions
* max_locks_per_transaction
* max_worker_processes

It might happen that values of these parameters in the global configuration are not set high enough, what makes impossible to start a replica without human intervention. Usually it happens when we bootstrap a new cluster from the basebackup.

As a solution to this problem we will take values of above parameters from the pg_controldata output and in case if the values in the global configuration are not high enough, apply values taken from pg_controldata and set `pending_restart` flag.
This commit is contained in:
Alexander Kukushkin
2018-06-12 14:04:32 +02:00
committed by GitHub
parent aa18f70466
commit e939304001
3 changed files with 66 additions and 10 deletions

View File

@@ -612,6 +612,13 @@ class PatroniPoolController(object):
'method': 'pg_basebackup',
'pg_basebackup': {
'command': self.BACKUP_SCRIPT + ' --walmethod=stream --dbname=' + f.backup_source
},
'dcs': {
'postgresql': {
'parameters': {
'max_connections': 101
}
}
}
},
'postgresql': {

View File

@@ -403,7 +403,7 @@ class Postgresql(object):
@property
def sysid(self):
if not self._sysid:
if not self._sysid and not self.bootstrapping:
data = self.controldata()
self._sysid = data.get('Database system identifier', "")
return self._sysid
@@ -818,6 +818,34 @@ class Postgresql(object):
logger.warning("Timed out waiting for PostgreSQL to start")
return False
def _build_effective_configuration(self):
"""It might happen that the current value of one (or more) below parameters stored in
the controldata is higher than the value stored in the global cluster configuration.
Example: max_connections in global configuration is 100, but in controldata
`Current max_connections setting: 200`. If we try to start postgres with
max_connections=100, it will immediately exit.
As a workaround we will start it with the values from controldata and set `pending_restart`
to true as an indicator that current values of parameters are not matching expectations."""
OPTIONS_MAPPING = {
'max_connections': 'max_connections setting',
'max_worker_processes': 'max_worker_processes setting',
'max_prepared_transactions': 'max_prepared_xacts setting',
'max_locks_per_transaction': 'max_locks_per_xact setting'
}
data = self.controldata()
effective_configuration = self._server_parameters.copy()
for name, cname in OPTIONS_MAPPING.items():
value = parse_int(effective_configuration[name])
cvalue = parse_int(data[cname])
if cvalue > value:
effective_configuration[name] = cvalue
self._pending_restart = True
return effective_configuration
def start(self, timeout=None, block_callbacks=False, task=None):
"""Start PostgreSQL
@@ -843,12 +871,13 @@ class Postgresql(object):
self.set_state('starting')
self._pending_restart = False
self._write_postgresql_conf()
configuration = self._server_parameters if self.role == 'master' else self._build_effective_configuration()
self._write_postgresql_conf(configuration)
self.resolve_connection_addresses()
self._replace_pg_hba()
options = ['--{0}={1}'.format(p, self._server_parameters[p]) for p in self.CMDLINE_OPTIONS
if p in self._server_parameters and p != 'wal_keep_segments']
options = ['--{0}={1}'.format(p, configuration[p]) for p in self.CMDLINE_OPTIONS
if p in configuration and p != 'wal_keep_segments']
with self._cancellable_lock:
if self._is_cancelled:
@@ -1053,7 +1082,7 @@ class Postgresql(object):
self.set_state('restart failed ({0})'.format(self.state))
return ret
def _write_postgresql_conf(self):
def _write_postgresql_conf(self, configuration=None):
# rename the original configuration if it is necessary
if 'custom_conf' not in self.config and not os.path.exists(self._postgresql_base_conf):
os.rename(self._postgresql_conf, self._postgresql_base_conf)
@@ -1061,7 +1090,7 @@ class Postgresql(object):
with open(self._postgresql_conf, 'w') as f:
f.write(self._CONFIG_WARNING_HEADER)
f.write("include '{0}'\n\n".format(self.config.get('custom_conf') or self._postgresql_base_conf_name))
for name, value in sorted(self._server_parameters.items()):
for name, value in sorted((configuration or self._server_parameters).items()):
if not self._running_custom_bootstrap or name != 'hba_file':
f.write("{0} = '{1}'\n".format(name, value))
# when we are doing custom bootstrap we assume that we don't know superuser password
@@ -1164,7 +1193,7 @@ class Postgresql(object):
""" return the contents of pg_controldata, or non-True value if pg_controldata call failed """
result = {}
# Don't try to call pg_controldata during backup restore
if not self.bootstrapping and self._version_file_exists() and self.state != 'creating replica':
if self._version_file_exists() and self.state != 'creating replica':
try:
data = subprocess.check_output([self._pgcommand('pg_controldata'), self._data_dir],
env={'LANG': 'C', 'LC_ALL': 'C', 'PATH': os.environ['PATH']})
@@ -1587,9 +1616,12 @@ $$""".format(name, ' '.join(options)), name, password, password)
self.restart()
else:
self._replace_pg_hba()
self.reload()
time.sleep(1) # give a time to postgres to "reload" configuration files
self.close_connection() # close connection to reconnect with a new password
if self.pending_restart:
self.restart()
else:
self.reload()
time.sleep(1) # give a time to postgres to "reload" configuration files
self.close_connection() # close connection to reconnect with a new password
except Exception:
logger.exception('post_bootstrap')
task.complete(False)

View File

@@ -635,6 +635,12 @@ class TestPostgresql(unittest.TestCase):
self.p.post_bootstrap({}, task)
self.assertTrue(task.result)
self.p.bootstrap(config)
with patch.object(Postgresql, 'pending_restart', PropertyMock(return_value=True)), \
patch.object(Postgresql, 'restart', Mock()) as mock_restart:
self.p.post_bootstrap({}, task)
mock_restart.assert_called_once()
self.p.bootstrap(config)
self.p.set_state('stopped')
self.p.reload_config({'authentication': {'superuser': {'username': 'p', 'password': 'p'},
@@ -971,3 +977,14 @@ class TestPostgresql(unittest.TestCase):
self.p.cancel()
type(self.p._cancellable).returncode = PropertyMock(side_effect=[None, -15])
self.p.cancel()
@patch.object(Postgresql, 'get_postgres_role_from_data_directory', Mock(return_value='replica'))
def test__build_effective_configuration(self):
with patch.object(Postgresql, 'controldata',
Mock(return_value={'max_connections setting': '200',
'max_worker_processes setting': '20',
'max_prepared_xacts setting': '100',
'max_locks_per_xact setting': '100'})):
self.p.cancel()
self.assertFalse(self.p.start())
self.assertTrue(self.p.pending_restart)