Files
patroni/docs/ha_loop_diagram.dot
Alexander Kukushkin b470ade20e Change master->primary, take two (#3127)
This commit is a breaking change:
1. `role` in DCS is written as "primary" instead of "master".
2. `role` in REST API responses is also written as "primary".
3. REST API no longer accepts role=master in requests (for example switchover/failover/restart endpoints).
4. `/metrics` REST API endpoint will no longer report `patroni_master`.
5. `patronictl` no longer accepts `--master` argument.
6. `no_master` option in declarative configuration of custom replica creation methods is no longer treated as a special option, please use `no_leader` instead.
7. `patroni_wale_restore` doesn't accept `--no_master` anymore.
8. `patroni_barman` doesn't accept `--role=master` anymore.
9. callback scripts will be executed with role=primary instead of role=master
10. On Kubernetes Patroni by default will set role label to primary. In case if you want to keep old behavior and avoid downtime or lengthy complex migrations you can configure `kubernetes.leader_label_value` and `kubernetes.standby_leader_label_value` to `master`.

However, a few exceptions regarding master are still in place:
1. `GET /master` REST API endpoint will continue to work.
2. `master_start_timeout` and `master_stop_timeout` in global configuration are still accepted.
3. `master` tag is still preserved in Consul services in addition to `primary`.

Rationale for these exceptions: DBA doesn't always 100% control the infrastructure and can't adjust the configuration.
2024-08-28 17:19:00 +02:00

149 lines
8.0 KiB
Plaintext

// Graphviz source for ha_loop_diagram.png
// recompile with:
// dot -Tpng ha_loop_diagram.dot -o ha_loop_diagram.png
digraph G {
rankdir=TB;
fontname="sans-serif";
penwidth="0.3";
layout="dot";
newrank=true;
edge [fontname="sans-serif",
fontsize=12,
color=black,
fontcolor=black];
node [fontname=serif,
fontsize=12,
fillcolor=white,
color=black,
fontcolor=black,
style=filled];
"start" [label=Start, shape="rectangle", fillcolor="green"]
"start" -> "load_cluster_from_dcs";
"update_member" [label="Persist node state in DCS"]
"update_member" -> "start"
subgraph cluster_run_cycle {
label="run_cycle"
"load_cluster_from_dcs" [label="Load cluster from DCS"];
"touch_member" [label="Persist node in DCS"];
"cluster.has_member" [shape="diamond", label="Is node registered on DCS?"]
"cluster.has_member" -> "touch_member" [label="no" color="red"]
"long_action_in_progress?" [shape="diamond" label="Is the PostgreSQL currently being\nstopping/starting/restarting/reinitializing?"]
"load_cluster_from_dcs" -> "cluster.has_member";
"touch_member" -> "long_action_in_progress?";
"cluster.has_member" -> "long_action_in_progress?" [label="yes" color="green"];
"long_action_in_progress?" -> "recovering?" [label="no" color="red"]
"recovering?" [label="Was cluster recovering and failed?", shape="diamond"];
"recovering?" -> "post_recover" [label="yes" color="green"];
"recovering?" -> "data_directory_empty" [label="no" color="red"];
"post_recover" [label="Remove leader key (if I was the leader)"];
"data_directory_empty" [label="Is data folder empty?", shape="diamond"];
"data_directory_empty" -> "cluster_initialize" [label="no" color="red"];
"data_belongs_to_cluster" [label="Does data dir belong to cluster?", shape="diamond"];
"data_belongs_to_cluster" -> "exit" [label="no" color="red"];
"data_belongs_to_cluster" -> "is_healthy" [label="yes" color="green"]
"exit" [label="Fail and exit", fillcolor=red];
"cluster_initialize" [label="Is cluster initialized on DCS?" shape="diamond"]
"cluster_initialize" -> "cluster.has_leader" [label="no" color="red"]
"cluster.has_leader" [label="Does the cluster has leader?", shape="diamond"]
"cluster.has_leader" -> "dcs.initialize" [label="no", color="red"]
"cluster.has_leader" -> "is_healthy" [label="yes", color="green"]
"cluster_initialize" -> "data_belongs_to_cluster" [label="yes" color="green"]
"dcs.initialize" [label="Initialize new cluster"];
"dcs.initialize" -> "is_healthy"
"is_healthy" [label="Is node healthy?\n(running Postgres)", shape="diamond"];
"recover" [label="Start as read-only\nand set Recover flag"]
"is_healthy" -> "recover" [label="no" color="red"];
"is_healthy" -> "cluster.is_unlocked" [label="yes" color="green"];
"cluster.is_unlocked" [label="Does the cluster has a leader?", shape="diamond"]
}
"post_recover" -> "update_member"
"recover" -> "update_member"
"long_action_in_progress?" -> "async_has_lock?" [label="yes" color="green"];
"cluster.is_unlocked" -> "unhealthy_is_healthiest" [label="no" color="red"]
"cluster.is_unlocked" -> "healthy_has_lock" [label="yes" color="green"]
"data_directory_empty" -> "bootstrap.is_unlocked" [label="yes" color="green"]
subgraph cluster_async {
label = "Long action in progress\n(Start/Stop/Restart/Reinitialize)"
"async_has_lock?" [label="Do I have the leader lock?", shape="diamond"]
"async_update_lock" [label="Renew leader lock"]
"async_has_lock?" -> "async_update_lock" [label="yes" color="green"]
}
"async_update_lock" -> "update_member"
"async_has_lock?" -> "update_member" [label="no" color="red"]
subgraph cluster_bootstrap {
label = "Node bootstrap";
"bootstrap.is_unlocked" [label="Does the cluster has a leader?", shape="diamond"]
"bootstrap.is_initialized" [label="Does the cluster has an initialize key?", shape="diamond"]
"bootstrap.is_unlocked" -> "bootstrap.is_initialized" [label="no" color="red"]
"bootstrap.is_unlocked" -> "bootstrap.select_node" [label="yes" color="green"]
"bootstrap.select_node" [label="Select a node to take a backup from"]
"bootstrap.do_bootstrap" [label="Run pg_basebackup\n(async)"]
"bootstrap.select_node" -> "bootstrap.do_bootstrap"
"bootstrap.is_initialized" -> "bootstrap.initialization_race" [label="no" color="red"]
"bootstrap.is_initialized" -> "bootstrap.wait_for_leader" [label="yes" color="green"]
"bootstrap.initialization_race" [label="Race for initialize key"]
"bootstrap.initialization_race" -> "bootstrap.won_initialize_race?"
"bootstrap.won_initialize_race?" [label="Do I won initialize race?", shape="diamond"]
"bootstrap.won_initialize_race?" -> "bootstrap.initdb_and_start" [label="yes" color="green"]
"bootstrap.won_initialize_race?" -> "bootstrap.wait_for_leader" [label="no" color="red"]
"bootstrap.wait_for_leader" [label="Need to wait for leader key"]
"bootstrap.initdb_and_start" [label="Run initdb, start postgres and create roles"]
"bootstrap.initdb_and_start" -> "bootstrap.success?"
"bootstrap.success?" [label="Success", shape="diamond"]
"bootstrap.success?" -> "bootstrap.take_leader_key" [label="yes" color="green"]
"bootstrap.success?" -> "bootstrap.clean" [label="no" color="red"]
"bootstrap.clean" [label="Remove initialize key from DCS\nand data directory from filesystem"]
"bootstrap.take_leader_key" [label="Take a leader key in DCS"]
}
"bootstrap.do_bootstrap" -> "update_member"
"bootstrap.wait_for_leader" -> "update_member"
"bootstrap.clean" -> "update_member"
"bootstrap.take_leader_key" -> "update_member"
subgraph cluster_process_healthy_cluster {
label = "process_healthy_cluster"
"healthy_has_lock" [label="Am I the owner of the leader lock?", shape=diamond]
"healthy_is_leader" [label="Is Postgres running as primary?", shape=diamond]
"healthy_no_lock" [label="Follow the leader (async,\ncreate/update recovery.conf and restart if necessary)"]
"healthy_has_lock" -> "healthy_no_lock" [label="no" color="red"]
"healthy_has_lock" -> "healthy_update_leader_lock" [label="yes" color="green"]
"healthy_update_leader_lock" [label="Try to update leader lock"]
"healthy_update_leader_lock" -> "healthy_update_success"
"healthy_update_success" [label="Success?", shape=diamond]
"healthy_update_success" -> "healthy_is_leader" [label="yes" color="green"]
"healthy_update_success" -> "healthy_demote" [label="no" color="red"]
"healthy_demote" [label="Demote (async,\nrestart in read-only)"]
"healthy_failover" [label="Promote Postgres to primary"]
"healthy_is_leader" -> "healthy_failover" [label="no" color="red"]
}
"healthy_demote" -> "update_member"
"healthy_is_leader" -> "update_member" [label="yes" color="green"]
"healthy_failover" -> "update_member"
"healthy_no_lock" -> "update_member"
subgraph cluster_process_unhealthy_cluster {
label = "process_unhealthy_cluster"
"unhealthy_is_healthiest" [label="Am I the healthiest node?", shape="diamond"]
"unhealthy_is_healthiest" -> "unhealthy_leader_race" [label="yes", color="green"]
"unhealthy_leader_race" [label="Try to create leader key"]
"unhealthy_leader_race" -> "unhealthy_acquire_lock"
"unhealthy_acquire_lock" [label="Was I able to get the lock?", shape="diamond"]
"unhealthy_is_leader" [label="Is Postgres running as primary?", shape=diamond]
"unhealthy_acquire_lock" -> "unhealthy_is_leader" [label="yes" color="green"]
"unhealthy_is_leader" -> "unhealthy_promote" [label="no" color="red"]
"unhealthy_promote" [label="Promote to primary"]
"unhealthy_is_healthiest" -> "unhealthy_follow" [label="no" color="red"]
"unhealthy_follow" [label="try to follow somebody else()"]
"unhealthy_acquire_lock" -> "unhealthy_follow" [label="no" color="red"]
}
"unhealthy_follow" -> "update_member"
"unhealthy_promote" -> "update_member"
"unhealthy_is_leader" -> "update_member" [label="yes" color="green"]
}