patroni/docs/ha_loop_diagram.dot

// Graphviz source for ha_loop_diagram.png
// recompile with:
// dot -Tpng ha_loop_diagram.dot -o ha_loop_diagram.png

digraph G {
  rankdir=TB;
  fontname="sans-serif";
  penwidth="0.3";
  layout="dot";
  newrank=true;
  edge [fontname="sans-serif",
        fontsize=12,
        color=black,
        fontcolor=black];
  node [fontname=serif,
        fontsize=12,
        fillcolor=white,
        color=black,
        fontcolor=black,
        style=filled];
  "start" [label=Start, shape="rectangle", fillcolor="green"]
  "start" -> "load_cluster_from_dcs";
  "update_member" [label="Persist node state in DCS"]
  "update_member" -> "start"

  subgraph cluster_run_cycle {
    label="run_cycle"
    "load_cluster_from_dcs" [label="Load cluster from DCS"];
    "touch_member" [label="Persist node in DCS"];
    "cluster.has_member" [shape="diamond", label="Is node registered on DCS?"]
    "cluster.has_member" -> "touch_member"  [label="no" color="red"]
    "long_action_in_progress?" [shape="diamond" label="Is the PostgreSQL currently being\nstopping/starting/restarting/reinitializing?"]
    "load_cluster_from_dcs" -> "cluster.has_member";
    "touch_member" -> "long_action_in_progress?";
    "cluster.has_member" -> "long_action_in_progress?"  [label="yes" color="green"];
    "long_action_in_progress?" -> "recovering?" [label="no" color="red"]
    "recovering?" [label="Was cluster recovering and failed?", shape="diamond"];
    "recovering?" -> "post_recover" [label="yes" color="green"];
    "recovering?" -> "data_directory_empty" [label="no" color="red"];
    "post_recover" [label="Remove leader key (if I was the leader)"];
    "data_directory_empty" [label="Is data folder empty?", shape="diamond"];
    "data_directory_empty" -> "cluster_initialize" [label="no" color="red"];
    "data_belongs_to_cluster" [label="Does data dir belong to cluster?", shape="diamond"];
    "data_belongs_to_cluster" -> "exit" [label="no" color="red"];
    "data_belongs_to_cluster" -> "is_healthy" [label="yes" color="green"]
    "exit" [label="Fail and exit", fillcolor=red];
    "cluster_initialize" [label="Is cluster initialized on DCS?" shape="diamond"]
    "cluster_initialize" -> "cluster.has_leader" [label="no" color="red"]
    "cluster.has_leader" [label="Does the cluster has leader?", shape="diamond"]
    "cluster.has_leader" -> "dcs.initialize" [label="no", color="red"]
    "cluster.has_leader" -> "is_healthy" [label="yes", color="green"]
    "cluster_initialize" -> "data_belongs_to_cluster" [label="yes" color="green"]
    "dcs.initialize" [label="Initialize new cluster"];
    "dcs.initialize" -> "is_healthy"
    "is_healthy" [label="Is node healthy?\n(running Postgres)", shape="diamond"];
    "recover" [label="Start as read-only\nand set Recover flag"]
    "is_healthy" -> "recover" [label="no" color="red"];
    "is_healthy" -> "cluster.is_unlocked" [label="yes" color="green"];
    "cluster.is_unlocked" [label="Does the cluster has a leader?", shape="diamond"]
  }

  "post_recover" -> "update_member"
  "recover" -> "update_member"
  "long_action_in_progress?" -> "async_has_lock?" [label="yes" color="green"];
  "cluster.is_unlocked" -> "unhealthy_is_healthiest" [label="no" color="red"]
  "cluster.is_unlocked" -> "healthy_has_lock" [label="yes" color="green"]
  "data_directory_empty" -> "bootstrap.is_unlocked" [label="yes" color="green"]

  subgraph cluster_async {
    label = "Long action in progress\n(Start/Stop/Restart/Reinitialize)"
    "async_has_lock?" [label="Do I have the leader lock?", shape="diamond"]
    "async_update_lock" [label="Renew leader lock"]
    "async_has_lock?" -> "async_update_lock" [label="yes" color="green"]
  }
  "async_update_lock" -> "update_member"
  "async_has_lock?" -> "update_member" [label="no" color="red"]

  subgraph cluster_bootstrap {
    label = "Node bootstrap";
    "bootstrap.is_unlocked" [label="Does the cluster has a leader?", shape="diamond"]
    "bootstrap.is_initialized" [label="Does the cluster has an initialize key?", shape="diamond"]
    "bootstrap.is_unlocked" -> "bootstrap.is_initialized" [label="no" color="red"]
    "bootstrap.is_unlocked" -> "bootstrap.select_node" [label="yes" color="green"]
    "bootstrap.select_node" [label="Select a node to take a backup from"]
    "bootstrap.do_bootstrap" [label="Run pg_basebackup\n(async)"]
    "bootstrap.select_node" -> "bootstrap.do_bootstrap"
    "bootstrap.is_initialized" -> "bootstrap.initialization_race" [label="no" color="red"]
    "bootstrap.is_initialized" -> "bootstrap.wait_for_leader" [label="yes" color="green"]
    "bootstrap.initialization_race" [label="Race for initialize key"]
    "bootstrap.initialization_race" -> "bootstrap.won_initialize_race?"
    "bootstrap.won_initialize_race?" [label="Do I won initialize race?", shape="diamond"]
    "bootstrap.won_initialize_race?" -> "bootstrap.initdb_and_start" [label="yes" color="green"]
    "bootstrap.won_initialize_race?" -> "bootstrap.wait_for_leader" [label="no" color="red"]
    "bootstrap.wait_for_leader" [label="Need to wait for leader key"]
    "bootstrap.initdb_and_start" [label="Run initdb, start postgres and create roles"]
    "bootstrap.initdb_and_start" -> "bootstrap.success?"
    "bootstrap.success?" [label="Success", shape="diamond"]
    "bootstrap.success?" -> "bootstrap.take_leader_key" [label="yes" color="green"]
    "bootstrap.success?" -> "bootstrap.clean" [label="no" color="red"]
    "bootstrap.clean" [label="Remove initialize key from DCS\nand data directory from filesystem"]
    "bootstrap.take_leader_key" [label="Take a leader key in DCS"]
  }

  "bootstrap.do_bootstrap" -> "update_member"
  "bootstrap.wait_for_leader" -> "update_member"
  "bootstrap.clean" -> "update_member"
  "bootstrap.take_leader_key" -> "update_member"

  subgraph cluster_process_healthy_cluster {
    label = "process_healthy_cluster"
    "healthy_has_lock" [label="Am I the owner of the leader lock?", shape=diamond]
    "healthy_is_leader" [label="Is Postgres running as primary?", shape=diamond]
    "healthy_no_lock" [label="Follow the leader (async,\ncreate/update recovery.conf and restart if necessary)"]
    "healthy_has_lock" -> "healthy_no_lock" [label="no" color="red"]
    "healthy_has_lock" -> "healthy_update_leader_lock" [label="yes" color="green"]
    "healthy_update_leader_lock" [label="Try to update leader lock"]
    "healthy_update_leader_lock" -> "healthy_update_success"
    "healthy_update_success" [label="Success?", shape=diamond]
    "healthy_update_success" -> "healthy_is_leader" [label="yes" color="green"]
    "healthy_update_success" -> "healthy_demote" [label="no" color="red"]
    "healthy_demote" [label="Demote (async,\nrestart in read-only)"]
    "healthy_failover" [label="Promote Postgres to primary"]
    "healthy_is_leader" -> "healthy_failover" [label="no" color="red"]
  }
  "healthy_demote" -> "update_member"
  "healthy_is_leader" -> "update_member" [label="yes" color="green"]
  "healthy_failover" -> "update_member"
  "healthy_no_lock" -> "update_member"

  subgraph cluster_process_unhealthy_cluster {
    label = "process_unhealthy_cluster"
    "unhealthy_is_healthiest" [label="Am I the healthiest node?", shape="diamond"]
    "unhealthy_is_healthiest" -> "unhealthy_leader_race" [label="yes", color="green"]
    "unhealthy_leader_race" [label="Try to create leader key"]
    "unhealthy_leader_race" -> "unhealthy_acquire_lock"
    "unhealthy_acquire_lock" [label="Was I able to get the lock?", shape="diamond"]
    "unhealthy_is_leader" [label="Is Postgres running as primary?", shape=diamond]
    "unhealthy_acquire_lock" -> "unhealthy_is_leader" [label="yes" color="green"]
    "unhealthy_is_leader" -> "unhealthy_promote" [label="no" color="red"]
    "unhealthy_promote" [label="Promote to primary"]
    "unhealthy_is_healthiest" -> "unhealthy_follow" [label="no" color="red"]
    "unhealthy_follow" [label="try to follow somebody else()"]
    "unhealthy_acquire_lock" -> "unhealthy_follow" [label="no" color="red"]
  }
  "unhealthy_follow" -> "update_member"
  "unhealthy_promote" -> "update_member"
  "unhealthy_is_leader" -> "update_member" [label="yes" color="green"]
}