Make sure data directory is empty before trying to restore backup (#307)

We are doing number of attempts when trying to initialize replica using
different methods. Any of this attemp may create and put something into
data directory, what causes next attempts fail.
In addition to that improve logging when creating replica.
This commit is contained in:
Alexander Kukushkin
2016-09-19 13:32:27 +02:00
committed by GitHub
parent 540ee2b3c7
commit 5c8399e4fa
2 changed files with 24 additions and 8 deletions

View File

@@ -450,6 +450,9 @@ class Postgresql(object):
# if basebackup succeeds, exit with success
break
else:
if not self.data_directory_empty():
self.remove_data_directory()
cmd = replica_method
method_config = {}
# user-defined method; check for configuration
@@ -459,19 +462,23 @@ class Postgresql(object):
# look to see if the user has supplied a full command path
# if not, use the method name as the command
cmd = method_config.pop('command', cmd)
# add the default parameters
# add the default parameters
method_config.update({"scope": self.scope,
"role": "replica",
"datadir": self._data_dir,
"connstring": connstring})
params = ["--{0}={1}".format(arg, val) for arg, val in method_config.items()]
try:
method_config.update({"scope": self.scope,
"role": "replica",
"datadir": self._data_dir,
"connstring": connstring})
params = ["--{0}={1}".format(arg, val) for arg, val in method_config.items()]
# call script with the full set of parameters
ret = subprocess.call(shlex.split(cmd) + params, env=env)
# if we succeeded, stop
if ret == 0:
logger.info('replica has been created using %s', replica_method)
break
else:
logger.error('Error creating replica using method %s: %s exited with code=%s',
replica_method, cmd, ret)
except Exception:
logger.exception('Error creating replica using method %s', replica_method)
ret = 1
@@ -1014,17 +1021,22 @@ $$""".format(name, ' '.join(options)), name, password, password)
maxfailures = 2
ret = 1
for bbfailures in range(0, maxfailures):
if not self.data_directory_empty():
self.remove_data_directory()
try:
ret = subprocess.call([self._pgcommand('pg_basebackup'), '--pgdata=' + self._data_dir,
'--xlog-method=stream', "--dbname=" + conn_url], env=env)
if ret == 0:
break
else:
logger.error('Error when fetching backup: pg_basebackup exited with code=%s', ret)
except Exception as e:
logger.error('Error when fetching backup with pg_basebackup: {0}'.format(e))
logger.error('Error when fetching backup with pg_basebackup: %s', e)
if bbfailures < maxfailures - 1:
logger.error('Trying again in 5 seconds')
logger.warning('Trying again in 5 seconds')
time.sleep(5)
return ret

View File

@@ -290,6 +290,7 @@ class TestPostgresql(unittest.TestCase):
self.assertFalse(self.p.can_rewind)
@patch('time.sleep', Mock())
@patch.object(Postgresql, 'remove_data_directory', Mock(return_value=True))
def test_create_replica(self):
self.p.delete_trigger_file = Mock(side_effect=OSError)
with patch('subprocess.call', Mock(side_effect=[1, 0])):
@@ -307,6 +308,9 @@ class TestPostgresql(unittest.TestCase):
with patch('subprocess.call', Mock(side_effect=Exception("foo"))):
self.assertEquals(self.p.create_replica(self.leader), 1)
with patch('subprocess.call', Mock(return_value=1)):
self.assertEquals(self.p.create_replica(self.leader), 1)
@patch.object(Postgresql, 'is_running', Mock(return_value=True))
def test_sync_replication_slots(self):
self.p.start()