mirror of
https://github.com/outbackdingo/patroni.git
synced 2026-01-28 10:20:05 +00:00
Citus cluster (coordinator and workers) will be stored in DCS as a fleet of Patroni logically grouped together: ``` /service/batman/ /service/batman/0/ /service/batman/0/initialize /service/batman/0/leader /service/batman/0/members/ /service/batman/0/members/m1 /service/batman/0/members/m2 /service/batman/ /service/batman/1/ /service/batman/1/initialize /service/batman/1/leader /service/batman/1/members/ /service/batman/1/members/m1 /service/batman/1/members/m2 ... ``` Where 0 is a Citus group for coordinator and 1, 2, etc are worker groups. Such hierarchy allows reading the entire Citus cluster with a single call to DCS (except Zookeeper). The get_cluster() method will be reading the entire Citus cluster on the coordinator because it needs to discover workers. For the worker cluster it will be reading the subtree of its own group. Besides that we introduce a new method get_citus_coordinator(). It will be used only by worker clusters. Since there is no hierarchical structures on K8s we will use the citus group suffix on all objects that Patroni creates. E.g. ``` batman-0-leader # the leader config map for the coordinator batman-0-config # the config map holding initialize, config, and history "keys" ... batman-1-leader # the leader config map for worker group 1 batman-1-config ... ``` Citus integration is enabled from patroni.yaml: ```yaml citus: database: citus group: 0 # 0 is for coordinator, 1, 2, etc are for workers ``` If enabled, Patroni will create the database, citus extension in it, and INSERTs INTO `pg_dist_authinfo` information required for Citus nodes to communicate between each other, i.e. 'password', 'sslcert', 'sslkey' for superuser if they are defined in the Patroni configuration file. When the new Citus coordinator/worker is bootstrapped, Patroni adds `synchronous_mode: on` to the `bootstrap.dcs` section. Besides that, Patroni takes over management of some Postgres GUCs: - `shared_preload_libraries` - Patroni ensures that the "citus" is added to the first place - `max_prepared_transactions` - if not set or set to 0, Patroni changes the value to `max_connections*2` - wal_level - automatically set to logical. It is used by Citus to move/split shards. Under the hood Citus is creating/removing replication slots and they are automatically added by Patroni to the `ignore_slots` configuration to avoid accidental removal. The coordinator primary actively discovers worker primary nodes and registers/updates them in the `pg_dist_node` table using citus_add_node() and citus_update_node() functions. Patroni running on the coordinator provides the new REST API endpoint: `POST /citus`. It is used by workers to facilitate controlled switchovers and restarts of worker primaries. When the worker primary needs to shut down Postgres because of restart or switchover, it calls the `POST /citus` endpoint on the coordinator and the Patroni on the coordinator starts a transaction and calls `citus_update_node(nodeid, 'host-demoted', port)` in order to pause client connections that work with the given worker. Once the new leader is elected or postgres started back, they perform another call to the `POST/citus` endpoint, that does another `citus_update_node()` call with actual hostname and port and commits a transaction. After transaction is committed, coordinator reestablishes connections to the worker node and client connections are unblocked. If clients don't run long transaction the operation finishes without client visible errors, but only a short latency spike. All operations on the `pg_dist_node` are serialized by Patroni on the coordinator. It allows to have more control and ROLLBACK transaction in progress if its lifetime exceeding a certain threshold and there are other worker nodes should be updated.
135 lines
4.2 KiB
YAML
135 lines
4.2 KiB
YAML
scope: batman
|
|
#namespace: /service/
|
|
name: postgresql1
|
|
|
|
restapi:
|
|
listen: 127.0.0.1:8009
|
|
connect_address: 127.0.0.1:8009
|
|
# cafile: /etc/ssl/certs/ssl-cacert-snakeoil.pem
|
|
# certfile: /etc/ssl/certs/ssl-cert-snakeoil.pem
|
|
# keyfile: /etc/ssl/private/ssl-cert-snakeoil.key
|
|
# authentication:
|
|
# username: username
|
|
# password: password
|
|
|
|
#ctl:
|
|
# insecure: false # Allow connections to Patroni REST API without verifying certificates
|
|
# certfile: /etc/ssl/certs/ssl-cert-snakeoil.pem
|
|
# keyfile: /etc/ssl/private/ssl-cert-snakeoil.key
|
|
# cacert: /etc/ssl/certs/ssl-cacert-snakeoil.pem
|
|
|
|
#citus:
|
|
# database: citus
|
|
# group: 1 # worker
|
|
|
|
etcd:
|
|
#Provide host to do the initial discovery of the cluster topology:
|
|
host: 127.0.0.1:2379
|
|
#Or use "hosts" to provide multiple endpoints
|
|
#Could be a comma separated string:
|
|
#hosts: host1:port1,host2:port2
|
|
#or an actual yaml list:
|
|
#hosts:
|
|
#- host1:port1
|
|
#- host2:port2
|
|
#Once discovery is complete Patroni will use the list of advertised clientURLs
|
|
#It is possible to change this behavior through by setting:
|
|
#use_proxies: true
|
|
|
|
#raft:
|
|
# data_dir: .
|
|
# self_addr: 127.0.0.1:2223
|
|
# partner_addrs:
|
|
# - 127.0.0.1:2222
|
|
# - 127.0.0.1:2224
|
|
|
|
bootstrap:
|
|
# this section will be written into Etcd:/<namespace>/<scope>/config after initializing new cluster
|
|
# and all other cluster members will use it as a `global configuration`
|
|
dcs:
|
|
ttl: 30
|
|
loop_wait: 10
|
|
retry_timeout: 10
|
|
maximum_lag_on_failover: 1048576
|
|
postgresql:
|
|
use_pg_rewind: true
|
|
# use_slots: true
|
|
parameters:
|
|
# wal_level: hot_standby
|
|
# hot_standby: "on"
|
|
# max_connections: 100
|
|
# max_worker_processes: 8
|
|
# wal_keep_segments: 8
|
|
# max_wal_senders: 10
|
|
# max_replication_slots: 10
|
|
# max_prepared_transactions: 0
|
|
# max_locks_per_transaction: 64
|
|
# wal_log_hints: "on"
|
|
# track_commit_timestamp: "off"
|
|
# archive_mode: "on"
|
|
# archive_timeout: 1800s
|
|
# archive_command: mkdir -p ../wal_archive && test ! -f ../wal_archive/%f && cp %p ../wal_archive/%f
|
|
# recovery_conf:
|
|
# restore_command: cp ../wal_archive/%f %p
|
|
|
|
# some desired options for 'initdb'
|
|
initdb: # Note: It needs to be a list (some options need values, others are switches)
|
|
- encoding: UTF8
|
|
- data-checksums
|
|
|
|
pg_hba: # Add following lines to pg_hba.conf after running 'initdb'
|
|
# For kerberos gss based connectivity (discard @.*$)
|
|
#- host replication replicator 127.0.0.1/32 gss include_realm=0
|
|
#- host all all 0.0.0.0/0 gss include_realm=0
|
|
- host replication replicator 127.0.0.1/32 md5
|
|
- host all all 0.0.0.0/0 md5
|
|
# - hostssl all all 0.0.0.0/0 md5
|
|
|
|
# Additional script to be launched after initial cluster creation (will be passed the connection URL as parameter)
|
|
# post_init: /usr/local/bin/setup_cluster.sh
|
|
|
|
# Some additional users users which needs to be created after initializing new cluster
|
|
users:
|
|
admin:
|
|
password: admin%
|
|
options:
|
|
- createrole
|
|
- createdb
|
|
|
|
postgresql:
|
|
listen: 127.0.0.1:5433
|
|
connect_address: 127.0.0.1:5433
|
|
# proxy_address: 127.0.0.1:5434 # The address of connection pool (e.g., pgbouncer) running next to Patroni/Postgres. Only for service discovery.
|
|
data_dir: data/postgresql1
|
|
# bin_dir:
|
|
# config_dir:
|
|
pgpass: /tmp/pgpass1
|
|
authentication:
|
|
replication:
|
|
username: replicator
|
|
password: rep-pass
|
|
superuser:
|
|
username: postgres
|
|
password: zalando
|
|
rewind: # Has no effect on postgres 10 and lower
|
|
username: rewind_user
|
|
password: rewind_password
|
|
# Server side kerberos spn
|
|
# krbsrvname: postgres
|
|
parameters:
|
|
# Fully qualified kerberos ticket file for the running user
|
|
# same as KRB5CCNAME used by the GSS
|
|
# krb_server_keyfile: /var/spool/keytabs/postgres
|
|
unix_socket_directories: '..' # parent directory of data_dir
|
|
basebackup:
|
|
- verbose
|
|
- max-rate: 100M
|
|
# - waldir: /pg-wal-mount/external-waldir # only needed in case pg_wal is symlinked outside of data_dir
|
|
# Additional fencing script executed after acquiring the leader lock but before promoting the replica
|
|
#pre_promote: /path/to/pre_promote.sh
|
|
|
|
tags:
|
|
nofailover: false
|
|
noloadbalance: false
|
|
clonefrom: false
|