add some linstor fixes

Signed-off-by: Andrei Kvapil <kvapss@gmail.com>
This commit is contained in:
Andrei Kvapil
2025-06-23 23:54:57 +02:00
parent c7be1a5572
commit 066ed77918
2 changed files with 9 additions and 60 deletions

View File

@@ -8,30 +8,6 @@ terminate() {
trap terminate SIGINT SIGQUIT SIGTERM
function get_drbd_connecting() {
all_devices="$(drbdsetup status --json 2>/dev/null)"
unhealthy_devices="$(echo "${all_devices}" | jq -r '
map(
select(
# Skip devices that were suspended for io errors, reconnect will not help here
.suspended == false and
# Uncomment to select Secondary devices only
# .role == "Secondary" and
(.connections[]."connection-state" == "Connecting")
)
| {
name: .name,
"peer-node-id": .connections[]
| select(.["connection-state"] == "Connecting")
| ."peer-node-id"
}
)
# redundant, but required for array intersection calculation later
| unique
')"
echo "${unhealthy_devices}"
}
echo "Starting Linstor per-satellite plunger"
while true; do
@@ -63,40 +39,4 @@ while true; do
drbdadm up "${secondary}" || echo "Command failed"
); done
# Detect devices that lost connection and can be simply reconnected
# This may be fixed in DRBD 9.2.13
# see https://github.com/LINBIT/drbd/blob/drbd-9.2/ChangeLog
connecting_devices1="$(get_drbd_connecting)"
if [ "${connecting_devices1}" != '[]' ]; then
# wait 10 seconds to avoid false positives
sleep 1 &
pid=$!
wait $pid
# and check again
connecting_devices2="$(get_drbd_connecting)"
export connecting_devices1 connecting_devices2
stuck_connecting="$(jq -rn '
env.connecting_devices1 | fromjson as $l1
| env.connecting_devices2 | fromjson as $l2
# calculate the intersection
| $l1 - ($l2 - $l1)
| .[]
# output as strings
| (.name) + " " + (."peer-node-id" | tostring)
')"
while IFS= read -r path; do (
echo "Trying to reconnect secondary volume ${path}"
set -x
# shellcheck disable=SC2086
drbdsetup disconnect ${path} || echo "Command failed"
# shellcheck disable=SC2086
drbdsetup connect ${path} || echo "Command failed"
) done <<< "$stuck_connecting"
fi
done

View File

@@ -13,6 +13,15 @@ spec:
certManager:
name: linstor-api-ca
kind: Issuer
properties:
- name: DrbdOptions/Net/connect-int
value: "15"
- name: DrbdOptions/Net/ping-int
value: "15"
- name: DrbdOptions/Net/ping-timeout
value: "20"
- name: DrbdOptions/Net/timeout
value: "100"
controller:
enabled: true
podTemplate: