diff --git a/packages/system/linstor/hack/plunger/plunger-satellite.sh b/packages/system/linstor/hack/plunger/plunger-satellite.sh index 3d450f29..688cfd65 100755 --- a/packages/system/linstor/hack/plunger/plunger-satellite.sh +++ b/packages/system/linstor/hack/plunger/plunger-satellite.sh @@ -8,30 +8,6 @@ terminate() { trap terminate SIGINT SIGQUIT SIGTERM -function get_drbd_connecting() { - all_devices="$(drbdsetup status --json 2>/dev/null)" - unhealthy_devices="$(echo "${all_devices}" | jq -r ' - map( - select( - # Skip devices that were suspended for io errors, reconnect will not help here - .suspended == false and - # Uncomment to select Secondary devices only - # .role == "Secondary" and - (.connections[]."connection-state" == "Connecting") - ) - | { - name: .name, - "peer-node-id": .connections[] - | select(.["connection-state"] == "Connecting") - | ."peer-node-id" - } - ) - # redundant, but required for array intersection calculation later - | unique - ')" - echo "${unhealthy_devices}" -} - echo "Starting Linstor per-satellite plunger" while true; do @@ -63,40 +39,4 @@ while true; do drbdadm up "${secondary}" || echo "Command failed" ); done - # Detect devices that lost connection and can be simply reconnected - # This may be fixed in DRBD 9.2.13 - # see https://github.com/LINBIT/drbd/blob/drbd-9.2/ChangeLog - connecting_devices1="$(get_drbd_connecting)" - if [ "${connecting_devices1}" != '[]' ]; then - - # wait 10 seconds to avoid false positives - sleep 1 & - pid=$! - wait $pid - - # and check again - connecting_devices2="$(get_drbd_connecting)" - - export connecting_devices1 connecting_devices2 - stuck_connecting="$(jq -rn ' - env.connecting_devices1 | fromjson as $l1 - | env.connecting_devices2 | fromjson as $l2 - # calculate the intersection - | $l1 - ($l2 - $l1) - | .[] - # output as strings - | (.name) + " " + (."peer-node-id" | tostring) - ')" - - while IFS= read -r path; do ( - echo "Trying to reconnect secondary volume ${path}" - set -x - # shellcheck disable=SC2086 - drbdsetup disconnect ${path} || echo "Command failed" - # shellcheck disable=SC2086 - drbdsetup connect ${path} || echo "Command failed" - ) done <<< "$stuck_connecting" - - fi - done diff --git a/packages/system/linstor/templates/cluster.yaml b/packages/system/linstor/templates/cluster.yaml index e3f8bb80..68af90fd 100644 --- a/packages/system/linstor/templates/cluster.yaml +++ b/packages/system/linstor/templates/cluster.yaml @@ -13,6 +13,15 @@ spec: certManager: name: linstor-api-ca kind: Issuer + properties: + - name: DrbdOptions/Net/connect-int + value: "15" + - name: DrbdOptions/Net/ping-int + value: "15" + - name: DrbdOptions/Net/ping-timeout + value: "20" + - name: DrbdOptions/Net/timeout + value: "100" controller: enabled: true podTemplate: