From 59b825ae4896e80dceb6b1dfd9ea8e8fb6d4d869 Mon Sep 17 00:00:00 2001 From: Stephen Taylor Date: Wed, 3 Jun 2020 16:03:59 +0000 Subject: [PATCH] [ceph-osd, ceph-client] Weight OSDs as they are added Currently OSDs are added by the ceph-osd chart with zero weight and they get reweighted to proper weights in the ceph-client chart after all OSDs have been deployed. This causes a problem when a deployment is partially completed and additional OSDs are added later. In this case the ceph-client chart has already run and the new OSDs don't ever get weighted correctly. This change weights OSDs properly as they are deployed instead. As noted in the script, the noin flag may be set during the deployment to prevent rebalancing as OSDs are added if necessary. Added the ability to set and unset Ceph cluster flags in the ceph-client chart. Change-Id: Ic9a3d8d5625af49b093976a855dd66e5705d2c29 --- ceph-client/templates/bin/pool/_init.sh.tpl | 27 ++++++++++++------- ceph-client/templates/job-rbd-pool.yaml | 4 +++ ceph-client/values.yaml | 4 +++ ceph-osd/templates/bin/osd/_directory.sh.tpl | 3 --- .../bin/osd/ceph-volume/_block.sh.tpl | 4 +-- .../bin/osd/ceph-volume/_bluestore.sh.tpl | 4 +-- .../bin/osd/ceph-volume/_common.sh.tpl | 21 +++++++++++++++ 7 files changed, 51 insertions(+), 16 deletions(-) diff --git a/ceph-client/templates/bin/pool/_init.sh.tpl b/ceph-client/templates/bin/pool/_init.sh.tpl index bf8c44c6..6ce3d23c 100644 --- a/ceph-client/templates/bin/pool/_init.sh.tpl +++ b/ceph-client/templates/bin/pool/_init.sh.tpl @@ -67,13 +67,6 @@ create_crushrule {{ .name }} {{ .crush_rule }} {{ .failure_domain }} {{ .device_ {{- end }} {{- end }} -function reweight_osds () { - for OSD_ID in $(ceph --cluster "${CLUSTER}" osd df | awk '$3 == "0" {print $1}'); do - OSD_WEIGHT=$(ceph --cluster "${CLUSTER}" osd df --format json-pretty| grep -A7 "\bosd.${OSD_ID}\b" | awk '/"kb"/{ gsub(",",""); d= $2/1073741824 ; r = sprintf("%.2f", d); print r }'); - ceph --cluster "${CLUSTER}" osd crush reweight osd.${OSD_ID} ${OSD_WEIGHT}; - done -} - function enable_autoscaling () { if [[ "${ENABLE_AUTOSCALER}" == "true" ]]; then ceph mgr module enable pg_autoscaler @@ -81,6 +74,22 @@ function enable_autoscaling () { fi } +function set_cluster_flags () { + if [[ ! -z "${CLUSTER_SET_FLAGS}" ]]; then + for flag in ${CLUSTER_SET_FLAGS}; do + ceph osd set ${flag} + done + fi +} + +function unset_cluster_flags () { + if [[ ! -z "${CLUSTER_UNSET_FLAGS}" ]]; then + for flag in ${CLUSTER_UNSET_FLAGS}; do + ceph osd unset ${flag} + done + fi +} + function create_pool () { POOL_APPLICATION=$1 POOL_NAME=$2 @@ -162,8 +171,6 @@ function manage_pool () { ceph --cluster "${CLUSTER}" osd pool set-quota "${POOL_NAME}" max_bytes $POOL_QUOTA } -reweight_osds - {{ $targetPGperOSD := .Values.conf.pool.target.pg_per_osd }} {{ $crushRuleDefault := .Values.conf.pool.default.crush_rule }} {{ $targetQuota := .Values.conf.pool.target.quota | default 100 }} @@ -175,6 +182,8 @@ if [[ -z "$(ceph osd versions | grep ceph\ version | grep -v nautilus)" ]]; then else cluster_capacity=$(ceph --cluster "${CLUSTER}" df | head -n3 | tail -n1 | awk '{print $1 substr($2, 1, 1)}' | numfmt --from=iec) fi +set_cluster_flags +unset_cluster_flags {{- range $pool := .Values.conf.pool.spec -}} {{- with $pool }} {{- if .crush_rule }} diff --git a/ceph-client/templates/job-rbd-pool.yaml b/ceph-client/templates/job-rbd-pool.yaml index 47c8bc94..351ef761 100644 --- a/ceph-client/templates/job-rbd-pool.yaml +++ b/ceph-client/templates/job-rbd-pool.yaml @@ -52,6 +52,10 @@ spec: value: "ceph" - name: ENABLE_AUTOSCALER value: {{ .Values.conf.features.pg_autoscaler | quote }} + - name: CLUSTER_SET_FLAGS + value: {{ .Values.conf.features.cluster_flags.set | quote }} + - name: CLUSTER_UNSET_FLAGS + value: {{ .Values.conf.features.cluster_flags.unset | quote }} command: - /tmp/pool-init.sh volumeMounts: diff --git a/ceph-client/values.yaml b/ceph-client/values.yaml index f78e28f7..a94df4d8 100644 --- a/ceph-client/values.yaml +++ b/ceph-client/values.yaml @@ -255,6 +255,10 @@ conf: mds: true mgr: true pg_autoscaler: true + cluster_flags: + # List of flags to set or unset separated by spaces + set: "" + unset: "" pool: # NOTE(portdirect): this drives a simple approximation of # https://ceph.com/pgcalc/, the `target.osd` key should be set to match the diff --git a/ceph-osd/templates/bin/osd/_directory.sh.tpl b/ceph-osd/templates/bin/osd/_directory.sh.tpl index 69d8a317..18385d1f 100644 --- a/ceph-osd/templates/bin/osd/_directory.sh.tpl +++ b/ceph-osd/templates/bin/osd/_directory.sh.tpl @@ -64,9 +64,6 @@ if [[ -n "$(find /var/lib/ceph/osd -type d -empty ! -name "lost+found")" ]]; th # init data directory ceph-osd -i ${OSD_ID} --mkfs --osd-uuid ${UUID} --mkjournal --osd-journal ${OSD_JOURNAL} --setuser ceph --setgroup ceph # add the osd to the crush map - # NOTE(supamatt): set the initial crush weight of the OSD to 0 to prevent automatic rebalancing - OSD_WEIGHT=0 - # NOTE(supamatt): add or move the OSD's CRUSH location crush_location fi diff --git a/ceph-osd/templates/bin/osd/ceph-volume/_block.sh.tpl b/ceph-osd/templates/bin/osd/ceph-volume/_block.sh.tpl index 68e150ef..7ccb8e1f 100644 --- a/ceph-osd/templates/bin/osd/ceph-volume/_block.sh.tpl +++ b/ceph-osd/templates/bin/osd/ceph-volume/_block.sh.tpl @@ -83,8 +83,8 @@ else --no-systemd ${OSD_ID} ${OSD_FSID} fi -# NOTE(supamatt): set the initial crush weight of the OSD to 0 to prevent automatic rebalancing -OSD_WEIGHT=0 +# NOTE(stevetaylor): Set the OSD's crush weight (use noin flag to prevent rebalancing if necessary) +OSD_WEIGHT=$(get_osd_crush_weight_from_device ${OSD_DEVICE}) # NOTE(supamatt): add or move the OSD's CRUSH location crush_location diff --git a/ceph-osd/templates/bin/osd/ceph-volume/_bluestore.sh.tpl b/ceph-osd/templates/bin/osd/ceph-volume/_bluestore.sh.tpl index 80a16bbe..a3110ac5 100644 --- a/ceph-osd/templates/bin/osd/ceph-volume/_bluestore.sh.tpl +++ b/ceph-osd/templates/bin/osd/ceph-volume/_bluestore.sh.tpl @@ -89,8 +89,8 @@ else fi fi -# NOTE(supamatt): set the initial crush weight of the OSD to 0 to prevent automatic rebalancing -OSD_WEIGHT=0 +# NOTE(stevetaylor): Set the OSD's crush weight (use noin flag to prevent rebalancing if necessary) +OSD_WEIGHT=$(get_osd_crush_weight_from_device ${OSD_DEVICE}) # NOTE(supamatt): add or move the OSD's CRUSH location crush_location diff --git a/ceph-osd/templates/bin/osd/ceph-volume/_common.sh.tpl b/ceph-osd/templates/bin/osd/ceph-volume/_common.sh.tpl index 39adc1bd..2a839461 100644 --- a/ceph-osd/templates/bin/osd/ceph-volume/_common.sh.tpl +++ b/ceph-osd/templates/bin/osd/ceph-volume/_common.sh.tpl @@ -289,6 +289,27 @@ function get_lvm_tag_from_device { get_lvm_tag_from_volume ${logical_volume} ${tag} } +# Helper function to get the size of a logical volume +function get_lv_size_from_device { + device="$1" + logical_volume="$(get_lv_from_device ${device})" + + lvs ${logical_volume} -o LV_SIZE --noheadings --units k --nosuffix | xargs | cut -d'.' -f1 +} + +# Helper function to get the crush weight for an osd device +function get_osd_crush_weight_from_device { + device="$1" + lv_size="$(get_lv_size_from_device ${device})" # KiB + + if [[ ! -z "${BLOCK_DB_SIZE}" ]]; then + db_size=$(echo "${BLOCK_DB_SIZE}" | cut -d'B' -f1 | numfmt --from=iec | awk '{print $1/1024}') # KiB + lv_size=$((lv_size+db_size)) # KiB + fi + + echo ${lv_size} | awk '{printf("%.2f\n", $1/1073741824)}' # KiB to TiB +} + # Helper function to get a cluster FSID from a physical device function get_cluster_fsid_from_device { device="$1"