mirror of
https://github.com/outbackdingo/cozystack.git
synced 2026-01-27 18:18:41 +00:00
@@ -8,30 +8,6 @@ terminate() {
|
||||
|
||||
trap terminate SIGINT SIGQUIT SIGTERM
|
||||
|
||||
function get_drbd_connecting() {
|
||||
all_devices="$(drbdsetup status --json 2>/dev/null)"
|
||||
unhealthy_devices="$(echo "${all_devices}" | jq -r '
|
||||
map(
|
||||
select(
|
||||
# Skip devices that were suspended for io errors, reconnect will not help here
|
||||
.suspended == false and
|
||||
# Uncomment to select Secondary devices only
|
||||
# .role == "Secondary" and
|
||||
(.connections[]."connection-state" == "Connecting")
|
||||
)
|
||||
| {
|
||||
name: .name,
|
||||
"peer-node-id": .connections[]
|
||||
| select(.["connection-state"] == "Connecting")
|
||||
| ."peer-node-id"
|
||||
}
|
||||
)
|
||||
# redundant, but required for array intersection calculation later
|
||||
| unique
|
||||
')"
|
||||
echo "${unhealthy_devices}"
|
||||
}
|
||||
|
||||
echo "Starting Linstor per-satellite plunger"
|
||||
|
||||
while true; do
|
||||
@@ -63,40 +39,4 @@ while true; do
|
||||
drbdadm up "${secondary}" || echo "Command failed"
|
||||
); done
|
||||
|
||||
# Detect devices that lost connection and can be simply reconnected
|
||||
# This may be fixed in DRBD 9.2.13
|
||||
# see https://github.com/LINBIT/drbd/blob/drbd-9.2/ChangeLog
|
||||
connecting_devices1="$(get_drbd_connecting)"
|
||||
if [ "${connecting_devices1}" != '[]' ]; then
|
||||
|
||||
# wait 10 seconds to avoid false positives
|
||||
sleep 1 &
|
||||
pid=$!
|
||||
wait $pid
|
||||
|
||||
# and check again
|
||||
connecting_devices2="$(get_drbd_connecting)"
|
||||
|
||||
export connecting_devices1 connecting_devices2
|
||||
stuck_connecting="$(jq -rn '
|
||||
env.connecting_devices1 | fromjson as $l1
|
||||
| env.connecting_devices2 | fromjson as $l2
|
||||
# calculate the intersection
|
||||
| $l1 - ($l2 - $l1)
|
||||
| .[]
|
||||
# output as strings
|
||||
| (.name) + " " + (."peer-node-id" | tostring)
|
||||
')"
|
||||
|
||||
while IFS= read -r path; do (
|
||||
echo "Trying to reconnect secondary volume ${path}"
|
||||
set -x
|
||||
# shellcheck disable=SC2086
|
||||
drbdsetup disconnect ${path} || echo "Command failed"
|
||||
# shellcheck disable=SC2086
|
||||
drbdsetup connect ${path} || echo "Command failed"
|
||||
) done <<< "$stuck_connecting"
|
||||
|
||||
fi
|
||||
|
||||
done
|
||||
|
||||
@@ -13,6 +13,15 @@ spec:
|
||||
certManager:
|
||||
name: linstor-api-ca
|
||||
kind: Issuer
|
||||
properties:
|
||||
- name: DrbdOptions/Net/connect-int
|
||||
value: "15"
|
||||
- name: DrbdOptions/Net/ping-int
|
||||
value: "15"
|
||||
- name: DrbdOptions/Net/ping-timeout
|
||||
value: "20"
|
||||
- name: DrbdOptions/Net/timeout
|
||||
value: "100"
|
||||
controller:
|
||||
enabled: true
|
||||
podTemplate:
|
||||
|
||||
Reference in New Issue
Block a user