diff --git a/docs/rest_api.rst b/docs/rest_api.rst index f0f9b6ab..939fb745 100644 --- a/docs/rest_api.rst +++ b/docs/rest_api.rst @@ -29,6 +29,36 @@ For all health check ``GET`` requests Patroni returns a JSON document with the s - ``GET /health``: returns HTTP status code **200** only when PostgreSQL is up and running. +- ``GET /liveness``: always returns HTTP status code **200** what only indicates that Patroni is running. Could be used for ``livenessProbe``. + +- ``GET /readiness``: returns HTTP status code **200** when the Patroni node is running as the leader or when PostgreSQL is up and running. The endpoint could be used for ``readinessProbe`` when it is not possible to use Kubenetes endpoints for leader elections (OpenShift). + +Both, ``readiness`` and ``liveness`` endpoints are very light-weight and not executing any SQL. Probes should be configured in such a way that they start failing about time when the leader key is expiring. With the default value of ``ttl``, which is ``30s`` example probes would look like: + +.. code-block:: yaml + + readinessProbe: + httpGet: + scheme: HTTP + path: /readiness + port: 8008 + initialDelaySeconds: 3 + periodSeconds: 10 + timeoutSeconds: 5 + successThreshold: 1 + failureThreshold: 3 + livenessProbe: + httpGet: + scheme: HTTP + path: /liveness + port: 8008 + initialDelaySeconds: 3 + periodSeconds: 10 + timeoutSeconds: 5 + successThreshold: 1 + failureThreshold: 3 + + Monitoring endpoint ------------------- diff --git a/kubernetes/entrypoint.sh b/kubernetes/entrypoint.sh index 1803301e..b4fa58be 100755 --- a/kubernetes/entrypoint.sh +++ b/kubernetes/entrypoint.sh @@ -33,7 +33,5 @@ postgresql: __EOF__ unset PATRONI_SUPERUSER_PASSWORD PATRONI_REPLICATION_PASSWORD -export KUBERNETES_NAMESPACE=$PATRONI_KUBERNETES_NAMESPACE -export POD_NAME=$PATRONI_NAME -exec /usr/bin/python3 /usr/local/bin/patroni /home/postgres/patroni.yml \ No newline at end of file +exec /usr/bin/python3 /usr/local/bin/patroni /home/postgres/patroni.yml diff --git a/kubernetes/openshift-example/templates/template_patroni_ephemeral.yml b/kubernetes/openshift-example/templates/template_patroni_ephemeral.yml index 12a252d8..98c66c6d 100644 --- a/kubernetes/openshift-example/templates/template_patroni_ephemeral.yml +++ b/kubernetes/openshift-example/templates/template_patroni_ephemeral.yml @@ -152,6 +152,16 @@ objects: image: docker-registry.default.svc:5000/${NAMESPACE}/patroni:latest imagePullPolicy: IfNotPresent name: ${APPLICATION_NAME} + readinessProbe: + httpGet: + scheme: HTTP + path: /readiness + port: 8008 + initialDelaySeconds: 3 + periodSeconds: 10 + timeoutSeconds: 5 + successThreshold: 1 + failureThreshold: 3 ports: - containerPort: 8008 protocol: TCP diff --git a/kubernetes/openshift-example/templates/template_patroni_persistent.yaml b/kubernetes/openshift-example/templates/template_patroni_persistent.yaml index 8dbb1acb..5a2a61d2 100644 --- a/kubernetes/openshift-example/templates/template_patroni_persistent.yaml +++ b/kubernetes/openshift-example/templates/template_patroni_persistent.yaml @@ -5,11 +5,9 @@ metadata: annotations: description: |- Patroni Postgresql database cluster, with persistent storage. - - WARNING: Any data stored will be lost upon pod destruction. Only use this template for testing. iconClass: icon-postgresql openshift.io/display-name: Patroni Postgresql (Persistent) - openshift.io/long-description: This template deploys a a patroni postgresql HA cluster without persistent storage. + openshift.io/long-description: This template deploys a a patroni postgresql HA cluster with persistent storage. tags: postgresql objects: - apiVersion: v1 @@ -166,6 +164,16 @@ objects: image: docker-registry.default.svc:5000/${NAMESPACE}/patroni:latest imagePullPolicy: IfNotPresent name: ${APPLICATION_NAME} + readinessProbe: + httpGet: + scheme: HTTP + path: /readiness + port: 8008 + initialDelaySeconds: 3 + periodSeconds: 10 + timeoutSeconds: 5 + successThreshold: 1 + failureThreshold: 3 ports: - containerPort: 8008 protocol: TCP @@ -314,4 +322,4 @@ parameters: - description: The size of the persistent volume to create. displayName: Persistent Volume Size name: PVC_SIZE - value: 5Gi \ No newline at end of file + value: 5Gi diff --git a/kubernetes/patroni_k8s.yaml b/kubernetes/patroni_k8s.yaml index 91598819..c99b25b5 100644 --- a/kubernetes/patroni_k8s.yaml +++ b/kubernetes/patroni_k8s.yaml @@ -10,7 +10,7 @@ spec: clusterIP: None --- -apiVersion: apps/v1beta1 +apiVersion: apps/v1 kind: StatefulSet metadata: name: &cluster_name patronidemo @@ -31,6 +31,16 @@ spec: - name: *cluster_name image: patroni # docker build -t patroni . imagePullPolicy: IfNotPresent + readinessProbe: + httpGet: + scheme: HTTP + path: /readiness + port: 8008 + initialDelaySeconds: 3 + periodSeconds: 10 + timeoutSeconds: 5 + successThreshold: 1 + failureThreshold: 3 ports: - containerPort: 8008 protocol: TCP @@ -123,6 +133,25 @@ spec: - port: 5432 targetPort: 5432 +--- +apiVersion: v1 +kind: Service +metadata: + name: patronidemo-repl + labels: + application: patroni + cluster-name: &cluster_name patronidemo + role: replica +spec: + type: ClusterIP + selector: + application: patroni + cluster-name: *cluster_name + role: replica + ports: + - port: 5432 + targetPort: 5432 + --- apiVersion: v1 kind: Secret diff --git a/patroni/api.py b/patroni/api.py index 3d5cf7cc..062b8827 100644 --- a/patroni/api.py +++ b/patroni/api.py @@ -24,6 +24,11 @@ logger = logging.getLogger(__name__) class RestApiHandler(BaseHTTPRequestHandler): + def _write_status_code_only(self, status_code): + message = self.responses[status_code][0] + self.wfile.write('{0} {1} {2}\r\n\r\n'.format(self.protocol_version, status_code, message).encode('utf-8')) + self.log_request(status_code) + def _write_response(self, status_code, body, content_type='text/html', headers=None): self.send_response(status_code) headers = headers or {} @@ -81,9 +86,6 @@ class RestApiHandler(BaseHTTPRequestHandler): def do_GET(self, write_status_code_only=False): """Default method for processing all GET requests which can not be routed to other methods""" - time_start = time.time() - request_type = 'OPTIONS' if write_status_code_only else 'GET' - path = '/master' if self.path == '/' else self.path response = self.get_postgresql_status() @@ -127,18 +129,26 @@ class RestApiHandler(BaseHTTPRequestHandler): status_code = replica_status_code if write_status_code_only: # when haproxy sends OPTIONS request it reads only status code and nothing more - message = self.responses[status_code][0] - self.wfile.write('{0} {1} {2}\r\n\r\n'.format(self.protocol_version, status_code, message).encode('utf-8')) + self._write_status_code_only(status_code) else: self._write_status_response(status_code, response) - time_end = time.time() - self.log_message('%s %s %s latency: %s ms', request_type, path, - status_code, (time_end - time_start) * 1000) - def do_OPTIONS(self): self.do_GET(write_status_code_only=True) + def do_GET_liveness(self): + self._write_status_code_only(200) + + def do_GET_readiness(self): + patroni = self.server.patroni + if patroni.ha.is_leader(): + status_code = 200 + elif patroni.postgresql.state == 'running': + status_code = 200 if patroni.dcs.cluster else 503 + else: + status_code = 503 + self._write_status_code_only(status_code) + def do_GET_patroni(self): response = self.get_postgresql_status(True) self._write_status_response(200, response) @@ -492,8 +502,13 @@ class RestApiHandler(BaseHTTPRequestHandler): state = 'unknown' return {'state': state, 'role': postgresql.role} + def handle_one_request(self): + self.__start_time = time.time() + BaseHTTPRequestHandler.handle_one_request(self) + def log_message(self, fmt, *args): - logger.debug("API thread: %s - - [%s] %s", self.client_address[0], self.log_date_time_string(), fmt % args) + latency = 1000.0 * (time.time() - self.__start_time) + logger.debug("API thread: %s - - %s latency: %0.3f ms", self.client_address[0], fmt % args, latency) class RestApiServer(ThreadingMixIn, HTTPServer, Thread): diff --git a/tests/test_api.py b/tests/test_api.py index 9d6b18d9..62da9bb9 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -177,10 +177,10 @@ class TestRestApiHandler(unittest.TestCase): with patch.object(RestApiHandler, 'get_postgresql_status', Mock(return_value={'role': 'replica'})): MockPatroni.dcs.cluster.sync.sync_standby = '' MockRestApiServer(RestApiHandler, 'GET /asynchronous') - MockPatroni.ha.is_leader = Mock(return_value=True) - MockRestApiServer(RestApiHandler, 'GET /replica') - with patch.object(MockHa, 'is_standby_cluster', Mock(return_value=True)): - MockRestApiServer(RestApiHandler, 'GET /standby_leader') + with patch.object(MockHa, 'is_leader', Mock(return_value=True)): + MockRestApiServer(RestApiHandler, 'GET /replica') + with patch.object(MockHa, 'is_standby_cluster', Mock(return_value=True)): + MockRestApiServer(RestApiHandler, 'GET /standby_leader') MockPatroni.dcs.cluster = None with patch.object(RestApiHandler, 'get_postgresql_status', Mock(return_value={'role': 'master'})): MockRestApiServer(RestApiHandler, 'GET /master') @@ -195,6 +195,16 @@ class TestRestApiHandler(unittest.TestCase): def test_do_OPTIONS(self): self.assertIsNotNone(MockRestApiServer(RestApiHandler, 'OPTIONS / HTTP/1.0')) + def test_do_GET_liveness(self): + self.assertIsNotNone(MockRestApiServer(RestApiHandler, 'GET /liveness HTTP/1.0')) + + def test_do_GET_readiness(self): + self.assertIsNotNone(MockRestApiServer(RestApiHandler, 'GET /readiness HTTP/1.0')) + with patch.object(MockHa, 'is_leader', Mock(return_value=True)): + self.assertIsNotNone(MockRestApiServer(RestApiHandler, 'GET /readiness HTTP/1.0')) + with patch.object(MockPostgresql, 'state', PropertyMock(return_value='stopped')): + self.assertIsNotNone(MockRestApiServer(RestApiHandler, 'GET /readiness HTTP/1.0')) + @patch.object(MockPostgresql, 'state', PropertyMock(return_value='stopped')) def test_do_GET_patroni(self): self.assertIsNotNone(MockRestApiServer(RestApiHandler, 'GET /patroni'))