mirror of
https://github.com/outbackdingo/patroni.git
synced 2026-01-27 10:20:10 +00:00
Create readiness and liveness endpoints (#1590)
They could be useful to eliminate "unhealthy" pods from subsets addresses when the K8s service with label selectors are used. Real-life example: the node where the primary was running has failed and being shutdown and Patroni can't update (remove) the role label. Therefore on OpenShift the leader service will have two pods assigned, one of them is a failed primary. With the readiness probe defined, the failed primary pod will be excluded from the list.
This commit is contained in:
committed by
GitHub
parent
7a13579973
commit
db8c634db3
@@ -29,6 +29,36 @@ For all health check ``GET`` requests Patroni returns a JSON document with the s
|
||||
|
||||
- ``GET /health``: returns HTTP status code **200** only when PostgreSQL is up and running.
|
||||
|
||||
- ``GET /liveness``: always returns HTTP status code **200** what only indicates that Patroni is running. Could be used for ``livenessProbe``.
|
||||
|
||||
- ``GET /readiness``: returns HTTP status code **200** when the Patroni node is running as the leader or when PostgreSQL is up and running. The endpoint could be used for ``readinessProbe`` when it is not possible to use Kubenetes endpoints for leader elections (OpenShift).
|
||||
|
||||
Both, ``readiness`` and ``liveness`` endpoints are very light-weight and not executing any SQL. Probes should be configured in such a way that they start failing about time when the leader key is expiring. With the default value of ``ttl``, which is ``30s`` example probes would look like:
|
||||
|
||||
.. code-block:: yaml
|
||||
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
scheme: HTTP
|
||||
path: /readiness
|
||||
port: 8008
|
||||
initialDelaySeconds: 3
|
||||
periodSeconds: 10
|
||||
timeoutSeconds: 5
|
||||
successThreshold: 1
|
||||
failureThreshold: 3
|
||||
livenessProbe:
|
||||
httpGet:
|
||||
scheme: HTTP
|
||||
path: /liveness
|
||||
port: 8008
|
||||
initialDelaySeconds: 3
|
||||
periodSeconds: 10
|
||||
timeoutSeconds: 5
|
||||
successThreshold: 1
|
||||
failureThreshold: 3
|
||||
|
||||
|
||||
Monitoring endpoint
|
||||
-------------------
|
||||
|
||||
|
||||
@@ -33,7 +33,5 @@ postgresql:
|
||||
__EOF__
|
||||
|
||||
unset PATRONI_SUPERUSER_PASSWORD PATRONI_REPLICATION_PASSWORD
|
||||
export KUBERNETES_NAMESPACE=$PATRONI_KUBERNETES_NAMESPACE
|
||||
export POD_NAME=$PATRONI_NAME
|
||||
|
||||
exec /usr/bin/python3 /usr/local/bin/patroni /home/postgres/patroni.yml
|
||||
exec /usr/bin/python3 /usr/local/bin/patroni /home/postgres/patroni.yml
|
||||
|
||||
@@ -152,6 +152,16 @@ objects:
|
||||
image: docker-registry.default.svc:5000/${NAMESPACE}/patroni:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: ${APPLICATION_NAME}
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
scheme: HTTP
|
||||
path: /readiness
|
||||
port: 8008
|
||||
initialDelaySeconds: 3
|
||||
periodSeconds: 10
|
||||
timeoutSeconds: 5
|
||||
successThreshold: 1
|
||||
failureThreshold: 3
|
||||
ports:
|
||||
- containerPort: 8008
|
||||
protocol: TCP
|
||||
|
||||
@@ -5,11 +5,9 @@ metadata:
|
||||
annotations:
|
||||
description: |-
|
||||
Patroni Postgresql database cluster, with persistent storage.
|
||||
|
||||
WARNING: Any data stored will be lost upon pod destruction. Only use this template for testing.
|
||||
iconClass: icon-postgresql
|
||||
openshift.io/display-name: Patroni Postgresql (Persistent)
|
||||
openshift.io/long-description: This template deploys a a patroni postgresql HA cluster without persistent storage.
|
||||
openshift.io/long-description: This template deploys a a patroni postgresql HA cluster with persistent storage.
|
||||
tags: postgresql
|
||||
objects:
|
||||
- apiVersion: v1
|
||||
@@ -166,6 +164,16 @@ objects:
|
||||
image: docker-registry.default.svc:5000/${NAMESPACE}/patroni:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: ${APPLICATION_NAME}
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
scheme: HTTP
|
||||
path: /readiness
|
||||
port: 8008
|
||||
initialDelaySeconds: 3
|
||||
periodSeconds: 10
|
||||
timeoutSeconds: 5
|
||||
successThreshold: 1
|
||||
failureThreshold: 3
|
||||
ports:
|
||||
- containerPort: 8008
|
||||
protocol: TCP
|
||||
@@ -314,4 +322,4 @@ parameters:
|
||||
- description: The size of the persistent volume to create.
|
||||
displayName: Persistent Volume Size
|
||||
name: PVC_SIZE
|
||||
value: 5Gi
|
||||
value: 5Gi
|
||||
|
||||
@@ -10,7 +10,7 @@ spec:
|
||||
clusterIP: None
|
||||
|
||||
---
|
||||
apiVersion: apps/v1beta1
|
||||
apiVersion: apps/v1
|
||||
kind: StatefulSet
|
||||
metadata:
|
||||
name: &cluster_name patronidemo
|
||||
@@ -31,6 +31,16 @@ spec:
|
||||
- name: *cluster_name
|
||||
image: patroni # docker build -t patroni .
|
||||
imagePullPolicy: IfNotPresent
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
scheme: HTTP
|
||||
path: /readiness
|
||||
port: 8008
|
||||
initialDelaySeconds: 3
|
||||
periodSeconds: 10
|
||||
timeoutSeconds: 5
|
||||
successThreshold: 1
|
||||
failureThreshold: 3
|
||||
ports:
|
||||
- containerPort: 8008
|
||||
protocol: TCP
|
||||
@@ -123,6 +133,25 @@ spec:
|
||||
- port: 5432
|
||||
targetPort: 5432
|
||||
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: patronidemo-repl
|
||||
labels:
|
||||
application: patroni
|
||||
cluster-name: &cluster_name patronidemo
|
||||
role: replica
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
application: patroni
|
||||
cluster-name: *cluster_name
|
||||
role: replica
|
||||
ports:
|
||||
- port: 5432
|
||||
targetPort: 5432
|
||||
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Secret
|
||||
|
||||
@@ -24,6 +24,11 @@ logger = logging.getLogger(__name__)
|
||||
|
||||
class RestApiHandler(BaseHTTPRequestHandler):
|
||||
|
||||
def _write_status_code_only(self, status_code):
|
||||
message = self.responses[status_code][0]
|
||||
self.wfile.write('{0} {1} {2}\r\n\r\n'.format(self.protocol_version, status_code, message).encode('utf-8'))
|
||||
self.log_request(status_code)
|
||||
|
||||
def _write_response(self, status_code, body, content_type='text/html', headers=None):
|
||||
self.send_response(status_code)
|
||||
headers = headers or {}
|
||||
@@ -81,9 +86,6 @@ class RestApiHandler(BaseHTTPRequestHandler):
|
||||
def do_GET(self, write_status_code_only=False):
|
||||
"""Default method for processing all GET requests which can not be routed to other methods"""
|
||||
|
||||
time_start = time.time()
|
||||
request_type = 'OPTIONS' if write_status_code_only else 'GET'
|
||||
|
||||
path = '/master' if self.path == '/' else self.path
|
||||
response = self.get_postgresql_status()
|
||||
|
||||
@@ -127,18 +129,26 @@ class RestApiHandler(BaseHTTPRequestHandler):
|
||||
status_code = replica_status_code
|
||||
|
||||
if write_status_code_only: # when haproxy sends OPTIONS request it reads only status code and nothing more
|
||||
message = self.responses[status_code][0]
|
||||
self.wfile.write('{0} {1} {2}\r\n\r\n'.format(self.protocol_version, status_code, message).encode('utf-8'))
|
||||
self._write_status_code_only(status_code)
|
||||
else:
|
||||
self._write_status_response(status_code, response)
|
||||
|
||||
time_end = time.time()
|
||||
self.log_message('%s %s %s latency: %s ms', request_type, path,
|
||||
status_code, (time_end - time_start) * 1000)
|
||||
|
||||
def do_OPTIONS(self):
|
||||
self.do_GET(write_status_code_only=True)
|
||||
|
||||
def do_GET_liveness(self):
|
||||
self._write_status_code_only(200)
|
||||
|
||||
def do_GET_readiness(self):
|
||||
patroni = self.server.patroni
|
||||
if patroni.ha.is_leader():
|
||||
status_code = 200
|
||||
elif patroni.postgresql.state == 'running':
|
||||
status_code = 200 if patroni.dcs.cluster else 503
|
||||
else:
|
||||
status_code = 503
|
||||
self._write_status_code_only(status_code)
|
||||
|
||||
def do_GET_patroni(self):
|
||||
response = self.get_postgresql_status(True)
|
||||
self._write_status_response(200, response)
|
||||
@@ -492,8 +502,13 @@ class RestApiHandler(BaseHTTPRequestHandler):
|
||||
state = 'unknown'
|
||||
return {'state': state, 'role': postgresql.role}
|
||||
|
||||
def handle_one_request(self):
|
||||
self.__start_time = time.time()
|
||||
BaseHTTPRequestHandler.handle_one_request(self)
|
||||
|
||||
def log_message(self, fmt, *args):
|
||||
logger.debug("API thread: %s - - [%s] %s", self.client_address[0], self.log_date_time_string(), fmt % args)
|
||||
latency = 1000.0 * (time.time() - self.__start_time)
|
||||
logger.debug("API thread: %s - - %s latency: %0.3f ms", self.client_address[0], fmt % args, latency)
|
||||
|
||||
|
||||
class RestApiServer(ThreadingMixIn, HTTPServer, Thread):
|
||||
|
||||
@@ -177,10 +177,10 @@ class TestRestApiHandler(unittest.TestCase):
|
||||
with patch.object(RestApiHandler, 'get_postgresql_status', Mock(return_value={'role': 'replica'})):
|
||||
MockPatroni.dcs.cluster.sync.sync_standby = ''
|
||||
MockRestApiServer(RestApiHandler, 'GET /asynchronous')
|
||||
MockPatroni.ha.is_leader = Mock(return_value=True)
|
||||
MockRestApiServer(RestApiHandler, 'GET /replica')
|
||||
with patch.object(MockHa, 'is_standby_cluster', Mock(return_value=True)):
|
||||
MockRestApiServer(RestApiHandler, 'GET /standby_leader')
|
||||
with patch.object(MockHa, 'is_leader', Mock(return_value=True)):
|
||||
MockRestApiServer(RestApiHandler, 'GET /replica')
|
||||
with patch.object(MockHa, 'is_standby_cluster', Mock(return_value=True)):
|
||||
MockRestApiServer(RestApiHandler, 'GET /standby_leader')
|
||||
MockPatroni.dcs.cluster = None
|
||||
with patch.object(RestApiHandler, 'get_postgresql_status', Mock(return_value={'role': 'master'})):
|
||||
MockRestApiServer(RestApiHandler, 'GET /master')
|
||||
@@ -195,6 +195,16 @@ class TestRestApiHandler(unittest.TestCase):
|
||||
def test_do_OPTIONS(self):
|
||||
self.assertIsNotNone(MockRestApiServer(RestApiHandler, 'OPTIONS / HTTP/1.0'))
|
||||
|
||||
def test_do_GET_liveness(self):
|
||||
self.assertIsNotNone(MockRestApiServer(RestApiHandler, 'GET /liveness HTTP/1.0'))
|
||||
|
||||
def test_do_GET_readiness(self):
|
||||
self.assertIsNotNone(MockRestApiServer(RestApiHandler, 'GET /readiness HTTP/1.0'))
|
||||
with patch.object(MockHa, 'is_leader', Mock(return_value=True)):
|
||||
self.assertIsNotNone(MockRestApiServer(RestApiHandler, 'GET /readiness HTTP/1.0'))
|
||||
with patch.object(MockPostgresql, 'state', PropertyMock(return_value='stopped')):
|
||||
self.assertIsNotNone(MockRestApiServer(RestApiHandler, 'GET /readiness HTTP/1.0'))
|
||||
|
||||
@patch.object(MockPostgresql, 'state', PropertyMock(return_value='stopped'))
|
||||
def test_do_GET_patroni(self):
|
||||
self.assertIsNotNone(MockRestApiServer(RestApiHandler, 'GET /patroni'))
|
||||
|
||||
Reference in New Issue
Block a user