diff --git a/features/basic_replication.feature b/features/basic_replication.feature index e0eafc2a..ba3fc926 100644 --- a/features/basic_replication.feature +++ b/features/basic_replication.feature @@ -4,6 +4,7 @@ Feature: basic replication Scenario: check replication of a single table Given I start postgres0 Then postgres0 is a leader after 10 seconds + And there is a non empty initialize key in DCS after 15 seconds When I issue a PATCH request to http://127.0.0.1:8008/config with {"ttl": 20, "loop_wait": 2, "synchronous_mode": true} Then I receive a response code 200 When I start postgres1 @@ -35,10 +36,12 @@ Feature: basic replication And I run patronictl.py resume batman Then I receive a response returncode 0 And postgres2 role is the primary after 24 seconds + And Response on GET http://127.0.0.1:8010/history contains recovery after 10 seconds When I issue a PATCH request to http://127.0.0.1:8010/config with {"synchronous_mode": null, "master_start_timeout": 0} Then I receive a response code 200 When I add the table bar to postgres2 Then table bar is present on postgres1 after 20 seconds + And Response on GET http://127.0.0.1:8010/config contains master_start_timeout after 10 seconds Scenario: check immediate failover when master_start_timeout=0 Given I kill postmaster on postgres2 diff --git a/features/custom_bootstrap.feature b/features/custom_bootstrap.feature index 89ec52c7..2c8671f7 100644 --- a/features/custom_bootstrap.feature +++ b/features/custom_bootstrap.feature @@ -13,5 +13,5 @@ Scenario: make a backup and do a restore into a new cluster Given I add the table bar to postgres1 And I do a backup of postgres1 When I start postgres2 in a cluster batman2 from backup - Then postgres2 is a leader of batman2 after 10 seconds + Then postgres2 is a leader of batman2 after 30 seconds And table bar is present on postgres2 after 10 seconds diff --git a/features/environment.py b/features/environment.py index deb8466c..d7ba4e82 100644 --- a/features/environment.py +++ b/features/environment.py @@ -454,8 +454,9 @@ class KubernetesController(AbstractDcsController): return (pod.metadata.annotations or {}).get('status', '') else: try: - e = self._api.read_namespaced_endpoints(scope + ('' if key == 'leader' else '-' + key), self._namespace) - if key == 'leader': + ep = scope + {'leader': '', 'history': '-config', 'initialize': '-config'}.get(key, '-' + key) + e = self._api.read_namespaced_endpoints(ep, self._namespace) + if key != 'sync': return e.metadata.annotations[key] else: return json.dumps(e.metadata.annotations) diff --git a/features/patroni_api.feature b/features/patroni_api.feature index e8b9eb5d..d6d53a17 100644 --- a/features/patroni_api.feature +++ b/features/patroni_api.feature @@ -43,6 +43,20 @@ Scenario: check dynamic configuration change via DCS When I issue a GET request to http://127.0.0.1:8008/patroni Then I receive a response code 200 And I receive a response tags {'new_tag': 'new_value'} + And I sleep for 4 seconds + +Scenario: check the scheduled restart + Given I issue a PATCH request to http://127.0.0.1:8008/config with {"postgresql": {"parameters": {"superuser_reserved_connections": "6"}}} + Then I receive a response code 200 + And Response on GET http://127.0.0.1:8008/patroni contains pending_restart after 5 seconds + Given I issue a scheduled restart at http://127.0.0.1:8008 in 3 seconds with {"role": "replica"} + Then I receive a response code 202 + And I sleep for 4 seconds + And Response on GET http://127.0.0.1:8008/patroni contains pending_restart after 10 seconds + Given I issue a scheduled restart at http://127.0.0.1:8008 in 3 seconds with {"restart_pending": "True"} + Then I receive a response code 202 + And Response on GET http://127.0.0.1:8008/patroni does not contain pending_restart after 10 seconds + And postgres0 role is the primary after 10 seconds Scenario: check API requests for the primary-replica pair in the pause mode Given I run patronictl.py pause batman @@ -101,16 +115,3 @@ Scenario: check the scheduled switchover Then I receive a response code 503 When I issue a GET request to http://127.0.0.1:8009/replica Then I receive a response code 200 - -Scenario: check the scheduled restart - Given I issue a PATCH request to http://127.0.0.1:8008/config with {"postgresql": {"parameters": {"superuser_reserved_connections": "6"}}} - Then I receive a response code 200 - And Response on GET http://127.0.0.1:8008/patroni contains pending_restart after 5 seconds - Given I issue a scheduled restart at http://127.0.0.1:8008 in 3 seconds with {"role": "replica"} - Then I receive a response code 202 - And I sleep for 4 seconds - And Response on GET http://127.0.0.1:8008/patroni contains pending_restart after 10 seconds - Given I issue a scheduled restart at http://127.0.0.1:8008 in 3 seconds with {"restart_pending": "True"} - Then I receive a response code 202 - And Response on GET http://127.0.0.1:8008/patroni does not contain pending_restart after 10 seconds - diff --git a/features/standby_cluster.feature b/features/standby_cluster.feature index bab07b98..9ba3dd06 100644 --- a/features/standby_cluster.feature +++ b/features/standby_cluster.feature @@ -2,7 +2,7 @@ Feature: standby cluster Scenario: check permanent logical slots are preserved on failover/switchover Given I start postgres1 Then postgres1 is a leader after 10 seconds - And I sleep for 3 seconds + And there is a non empty initialize key in DCS after 15 seconds When I issue a PATCH request to http://127.0.0.1:8009/config with {"loop_wait": 2, "slots": {"pm_1": {"type": "physical"}}, "postgresql": {"parameters": {"wal_level": "logical"}}} Then I receive a response code 200 And Response on GET http://127.0.0.1:8009/config contains slots after 10 seconds diff --git a/features/steps/cascading_replication.py b/features/steps/cascading_replication.py index 35fb585b..f8ca5708 100644 --- a/features/steps/cascading_replication.py +++ b/features/steps/cascading_replication.py @@ -34,3 +34,17 @@ def check_member(context, name, key, value, time_limit): pass time.sleep(1) assert False, "{0} does not have {1}={2} in dcs after {3} seconds".format(name, key, value, time_limit) + + +@step('there is a non empty {key:w} key in DCS after {time_limit:d} seconds') +def check_initialize(context, key, time_limit): + time_limit *= context.timeout_multiplier + max_time = time.time() + int(time_limit) + while time.time() < max_time: + try: + if context.dcs_ctl.query(key): + return + except Exception: + pass + time.sleep(1) + assert False, "There is no {0} in dcs after {1} seconds".format(key, time_limit)