diff --git a/dashboards/storage/linstor.json b/dashboards/storage/linstor.json new file mode 100644 index 00000000..be81abb8 --- /dev/null +++ b/dashboards/storage/linstor.json @@ -0,0 +1,2193 @@ +{ + "__inputs": [], + "__requires": [], + "annotations": { + "list": [] + }, + "editable": false, + "gnetId": null, + "graphTooltip": 0, + "id": null, + "links": [], + "panels": [ + { + "collapsed": false, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 33, + "panels": [], + "title": "LINSTOR", + "type": "row" + }, + { + "datasource": "${datasource}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "max": 100, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 18, + "x": 0, + "y": 1 + }, + "id": 41, + "options": { + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true, + "text": {} + }, + "pluginVersion": "8.2.6", + "targets": [ + { + "exemplar": true, + "expr": "((sum by (storage_pool)(linstor_storage_pool_capacity_total_bytes{node=~\"$node\"} != 0)-sum by (storage_pool)(linstor_storage_pool_capacity_free_bytes{node=~\"$node\"}))*100/sum by (storage_pool)(linstor_storage_pool_capacity_total_bytes{node=~\"$node\"}))", + "hide": false, + "legendFormat": "{{ storage_pool }}", + "refId": "A" + } + ], + "title": "Space Usage", + "type": "gauge" + }, + { + "datasource": "${datasource}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "text", + "value": null + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 6, + "x": 18, + "y": 1 + }, + "id": 43, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "8.2.6", + "targets": [ + { + "exemplar": true, + "expr": "sum by (storage_pool) (linstor_storage_pool_capacity_total_bytes{node=~\"$node\"} != 0)", + "hide": false, + "legendFormat": "{{ storage_pool }}", + "refId": "A" + } + ], + "title": "Total", + "type": "stat" + }, + { + "datasource": "${datasource}", + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "dark-green", + "value": null + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 6, + "x": 18, + "y": 5 + }, + "id": 44, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "8.2.6", + "targets": [ + { + "exemplar": true, + "expr": "sum by (storage_pool) (linstor_storage_pool_capacity_free_bytes{node=~\"$node\"} != 0)", + "hide": false, + "legendFormat": "{{ storage_pool }}", + "refId": "A" + } + ], + "title": "Free", + "type": "stat" + }, + { + "datasource": "${datasource}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "dark-red", + "value": null + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 6, + "x": 18, + "y": 9 + }, + "id": 42, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "8.2.6", + "targets": [ + { + "exemplar": true, + "expr": "sum by (storage_pool) ((linstor_storage_pool_capacity_total_bytes{node=~\"$node\"} != 0) - (linstor_storage_pool_capacity_free_bytes{node=~\"$node\"} != 0))", + "hide": false, + "legendFormat": "{{ storage_pool }}", + "refId": "A" + } + ], + "title": "Used", + "type": "stat" + }, + { + "datasource": "${datasource}", + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 3, + "x": 0, + "y": 11 + }, + "id": 36, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "8.2.6", + "targets": [ + { + "exemplar": true, + "expr": "count(linstor_node_state{nodetype=\"SATELLITE\",node=~\"$node\"} == 2) OR on() vector(0)", + "legendFormat": "", + "refId": "A" + } + ], + "title": "Nodes", + "type": "stat" + }, + { + "datasource": "${datasource}", + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 3, + "x": 3, + "y": 11 + }, + "id": 38, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "8.2.6", + "targets": [ + { + "exemplar": true, + "expr": "count(linstor_storage_pool_capacity_total_bytes{driver!=\"DISKLESS\",node=~\"$node\"}) OR on() vector(0)", + "legendFormat": "", + "refId": "A" + } + ], + "title": "Storage Pools", + "type": "stat" + }, + { + "datasource": "${datasource}", + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 3, + "x": 6, + "y": 11 + }, + "id": 35, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "8.2.6", + "targets": [ + { + "exemplar": true, + "expr": "sum(linstor_resource_definition_count{}) OR on() vector(0)", + "legendFormat": "", + "refId": "A" + } + ], + "title": "Resource Definitions", + "type": "stat" + }, + { + "datasource": "${datasource}", + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 3, + "x": 9, + "y": 11 + }, + "id": 37, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "8.2.6", + "targets": [ + { + "exemplar": true, + "expr": "count(linstor_resource_state{node=~\"$node\"}) OR on() vector(0)", + "legendFormat": "", + "refId": "A" + } + ], + "title": "Resources", + "type": "stat" + }, + { + "datasource": "${datasource}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 57, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "text", + "value": null + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 6, + "x": 12, + "y": 11 + }, + "id": 40, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.2.6", + "targets": [ + { + "exemplar": true, + "expr": "avg_over_time(linstor_scrape_duration_seconds[$__rate_interval])", + "legendFormat": "allocated", + "refId": "A" + } + ], + "title": "Scrape Duration", + "type": "timeseries" + }, + { + "datasource": "${datasource}", + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "dark-yellow", + "value": 6 + }, + { + "color": "orange", + "value": 10 + }, + { + "color": "red", + "value": 20 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 6, + "x": 18, + "y": 13 + }, + "id": 52, + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "8.2.6", + "targets": [ + { + "exemplar": true, + "expr": "sum by (module) (round(increase(linstor_error_reports_count{module!=\"\", node=~\"$node\"}[$__rate_interval])))", + "instant": false, + "legendFormat": "{{ module }}", + "refId": "A" + } + ], + "title": "New Error Reports", + "transformations": [], + "type": "stat" + }, + { + "datasource": "${datasource}", + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 1 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 4, + "x": 0, + "y": 14 + }, + "id": 51, + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "8.2.6", + "targets": [ + { + "exemplar": true, + "expr": "count(linstor_node_state{nodetype=\"SATELLITE\", node=~\"$node\"} != 2) OR on() vector(0)", + "legendFormat": "", + "refId": "A" + } + ], + "title": "Offline Nodes", + "type": "stat" + }, + { + "datasource": "${datasource}", + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 1 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 4, + "x": 4, + "y": 14 + }, + "id": 50, + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "8.2.6", + "targets": [ + { + "exemplar": true, + "expr": "count(linstor_storage_pool_error_count{driver!=\"DISKLESS\", node=~\"$node\"} != 0) OR on() vector(0)", + "legendFormat": "", + "refId": "A" + } + ], + "title": "Failed Storage Pools", + "type": "stat" + }, + { + "datasource": "${datasource}", + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 1 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 4, + "x": 8, + "y": 14 + }, + "id": 49, + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "8.2.6", + "targets": [ + { + "exemplar": true, + "expr": "count(((linstor_volume_state{node=~\"$node\"} != 1) != 4) != -1) OR on() vector(0)", + "legendFormat": "", + "refId": "A" + } + ], + "title": "Failed Resources", + "type": "stat" + }, + { + "collapsed": false, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 17 + }, + "id": 46, + "panels": [], + "title": "Statistics", + "type": "row" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${datasource}", + "fieldConfig": { + "defaults": { + "unit": "binBps" + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 18 + }, + "hiddenSeries": false, + "id": 24, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": false + }, + "percentage": false, + "pluginVersion": "8.2.6", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": true, + "targets": [ + { + "exemplar": true, + "expr": "rate(drbd_device_written_bytes_total{node=~\"$node\"}[$__rate_interval]) and topk(5, avg_over_time(drbd_device_written_bytes_total{node=~\"$node\"}[$__rate_interval]) > 0)", + "instant": false, + "legendFormat": "{{name}} on {{node}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Write Rate (5 Most Active Volumes)", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:104", + "format": "binBps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "$$hashKey": "object:105", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${datasource}", + "fieldConfig": { + "defaults": { + "unit": "binBps" + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 18 + }, + "hiddenSeries": false, + "id": 25, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": false + }, + "percentage": false, + "pluginVersion": "8.2.6", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": true, + "targets": [ + { + "exemplar": true, + "expr": "rate(drbd_device_read_bytes_total{node=~\"$node\"}[$__rate_interval]) and topk(5, avg_over_time(drbd_device_read_bytes_total{node=~\"$node\"}[$__rate_interval]) > 0)", + "instant": false, + "legendFormat": "{{name}} on {{node}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Read Rate (5 Most Active Volumes)", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:254", + "format": "binBps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "$$hashKey": "object:255", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${datasource}", + "fieldConfig": { + "defaults": { + "unit": "binBps" + }, + "overrides": [] + }, + "fill": 5, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 26 + }, + "hiddenSeries": false, + "id": 47, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": false + }, + "percentage": false, + "pluginVersion": "8.2.6", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": true, + "targets": [ + { + "exemplar": true, + "expr": "sum by (node) (rate(drbd_device_written_bytes_total{node=~\"$node\"}[$__rate_interval]))", + "instant": false, + "legendFormat": "{{ node }}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Write Rate by Node", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:410", + "format": "binBps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "$$hashKey": "object:411", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${datasource}", + "fieldConfig": { + "defaults": { + "unit": "binBps" + }, + "overrides": [] + }, + "fill": 5, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 26 + }, + "hiddenSeries": false, + "id": 48, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": false + }, + "percentage": false, + "pluginVersion": "8.2.6", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": true, + "targets": [ + { + "exemplar": true, + "expr": "sum by (node) (rate(drbd_device_read_bytes_total{node=~\"$node\"}[$__rate_interval]))", + "instant": false, + "legendFormat": "{{ node }}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Read Rate by Node", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:332", + "format": "binBps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "$$hashKey": "object:333", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "collapsed": false, + "datasource": "${datasource}", + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 34 + }, + "id": 8, + "panels": [], + "title": "DRBD", + "type": "row" + }, + { + "cacheTimeout": null, + "datasource": "${datasource}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 11, + "w": 16, + "x": 0, + "y": 35 + }, + "id": 6, + "links": [], + "maxDataPoints": 100, + "options": { + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true, + "text": {} + }, + "pluginVersion": "8.2.6", + "targets": [ + { + "$$hashKey": "object:470", + "aggregation": "Last", + "decimals": 2, + "displayAliasType": "Warning / Critical", + "displayType": "Regular", + "displayValueWithAlias": "Never", + "exemplar": true, + "expr": "drbd_resource_resources{node=~\"$node\"}", + "legendFormat": "{{node}}", + "refId": "A", + "units": "none", + "valueHandler": "Number Threshold" + } + ], + "title": "Number of DRBD Resources", + "type": "gauge" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${datasource}", + "fieldConfig": { + "defaults": { + "unit": "s" + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 8, + "x": 16, + "y": 35 + }, + "hiddenSeries": false, + "id": 27, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": false + }, + "percentage": false, + "pluginVersion": "8.2.6", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": true, + "targets": [ + { + "exemplar": true, + "expr": "avg_over_time(scrape_duration_seconds{job=\"linstor-node\", node=~\"$node\"}[$__rate_interval])", + "legendFormat": "{{node}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Scrape Duration", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:158", + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "$$hashKey": "object:159", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "datasource": "${datasource}", + "description": "DRBD data out of sync with a peer", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 1 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 8, + "x": 16, + "y": 43 + }, + "id": 12, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "8.2.6", + "targets": [ + { + "exemplar": true, + "expr": "sum(max by(name, volume) (drbd_peerdevice_outofsync_bytes{node=~\"$node\"} > 0)) OR on() vector(0)", + "legendFormat": "", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "out-of-sync data", + "type": "stat" + }, + { + "datasource": "${datasource}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 1 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 4, + "x": 0, + "y": 46 + }, + "id": 14, + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "8.2.6", + "targets": [ + { + "exemplar": false, + "expr": "count(count by (name) (drbd_connection_state{drbd_connection_state!=\"UpToDate\", drbd_connection_state!=\"Connected\", node=~\"$node\"} == 1)) OR on() vector(0)", + "legendFormat": "", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "disconnected", + "type": "stat" + }, + { + "datasource": "${datasource}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "match": "null", + "result": { + "index": 0, + "text": "0" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "orange", + "value": 1 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 4, + "x": 4, + "y": 46 + }, + "id": 20, + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "8.2.6", + "targets": [ + { + "exemplar": true, + "expr": "count(count by (name) (drbd_device_quorum{node=~\"$node\"} == 0)) OR on() vector(0)", + "format": "time_series", + "instant": false, + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + }, + { + "exemplar": true, + "expr": "count(drbd_device_quorum == 0)", + "hide": true, + "legendFormat": "", + "refId": "B" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "without quorum", + "type": "stat" + }, + { + "datasource": "${datasource}", + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 1 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 4, + "x": 8, + "y": 46 + }, + "id": 4, + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "8.2.6", + "targets": [ + { + "exemplar": true, + "expr": "count(drbd_device_unintentionaldiskless{node=~\"$node\"} == 1) OR on() vector(0)", + "legendFormat": "", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "storage failure", + "type": "stat" + }, + { + "datasource": "${datasource}", + "description": "DRBD data out of sync with a peer", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 1 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 4, + "x": 12, + "y": 46 + }, + "id": 29, + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "8.2.6", + "targets": [ + { + "exemplar": true, + "expr": "count(count by (name) (drbd_peerdevice_outofsync_bytes{node=~\"$node\"} > 0)) OR on() vector(0)", + "legendFormat": "", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "out-of-sync", + "type": "stat" + }, + { + "datasource": "${datasource}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": null, + "displayMode": "auto", + "filterable": false + }, + "mappings": [ + { + "options": { + "1": { + "text": "No" + } + }, + "type": "value" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 1 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 49 + }, + "id": 22, + "maxDataPoints": null, + "options": { + "frameIndex": 0, + "showHeader": true, + "sortBy": [ + { + "desc": false, + "displayName": "drbd_connection_state" + } + ] + }, + "pluginVersion": "8.2.6", + "targets": [ + { + "exemplar": true, + "expr": "drbd_connection_state{drbd_connection_state!=\"UpToDate\", drbd_connection_state!=\"Connected\", node=~\"$node\"} == 1", + "format": "table", + "instant": true, + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Disconnected DRBD Resources", + "transformations": [ + { + "id": "merge", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true, + "Value": true, + "__name__": true, + "instance": true, + "job": true, + "name": false, + "peer_node_id": true, + "tier": true + }, + "indexByName": { + "Time": 1, + "Value": 10, + "__name__": 2, + "conn_name": 5, + "drbd_connection_state": 4, + "instance": 6, + "job": 7, + "name": 0, + "node": 3, + "peer_node_id": 8, + "tier": 9 + }, + "renameByName": { + "Value #A": "Quorum?", + "conn_name": "Remote Node", + "drbd_connection_state": "State", + "instance": "Instance", + "job": "", + "name": "DRBD Resource", + "node": "Node", + "peer_node_id": "" + } + } + } + ], + "type": "table" + }, + { + "datasource": "${datasource}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": null, + "displayMode": "auto", + "filterable": false + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "bytes" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "DRBD Resource" + }, + "properties": [ + { + "id": "custom.width", + "value": 279 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Host" + }, + "properties": [ + { + "id": "custom.width", + "value": 238 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Out of Sync" + }, + "properties": [ + { + "id": "custom.width", + "value": 93 + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 49 + }, + "id": 16, + "options": { + "frameIndex": 0, + "showHeader": true, + "sortBy": [] + }, + "pluginVersion": "8.2.6", + "targets": [ + { + "exemplar": false, + "expr": "sum(drbd_peerdevice_outofsync_bytes{node=~\"$node\"}) by(name, node) > 0", + "format": "table", + "hide": false, + "instant": true, + "legendFormat": "", + "refId": "A" + } + ], + "title": "DRBD Resources Out of Sync", + "transformations": [ + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true + }, + "indexByName": { + "Time": 0, + "Value": 3, + "name": 1, + "node": 2 + }, + "renameByName": { + "Time": "", + "Value": "Out of Sync", + "instance": "Host", + "name": "DRBD Resource", + "node": "Node" + } + } + }, + { + "id": "sortBy", + "options": { + "fields": {}, + "sort": [ + { + "desc": true, + "field": "Out of Sync" + } + ] + } + } + ], + "type": "table" + }, + { + "datasource": "${datasource}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": null, + "displayMode": "auto", + "filterable": false + }, + "mappings": [ + { + "options": { + "1": { + "text": "No" + } + }, + "type": "value" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 1 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 57 + }, + "id": 31, + "maxDataPoints": null, + "options": { + "frameIndex": 0, + "showHeader": true + }, + "pluginVersion": "8.2.6", + "targets": [ + { + "exemplar": true, + "expr": "count(drbd_device_quorum{node=~\"$node\"} == 0) by(node, name)", + "format": "table", + "instant": true, + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "DRBD Resources Without Quorum", + "transformations": [ + { + "id": "merge", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true, + "Value": true + }, + "indexByName": {}, + "renameByName": { + "Value #A": "Quorum?", + "instance": "Instance", + "name": "DRBD Resource", + "node": "Node" + } + } + } + ], + "type": "table" + }, + { + "datasource": "${datasource}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": null, + "displayMode": "auto", + "filterable": false + }, + "mappings": [ + { + "options": { + "1": { + "text": "No" + } + }, + "type": "value" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 1 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 57 + }, + "id": 30, + "maxDataPoints": null, + "options": { + "frameIndex": 0, + "showHeader": true + }, + "pluginVersion": "8.2.6", + "targets": [ + { + "exemplar": true, + "expr": "count(drbd_device_unintentionaldiskless{node=~\"$node\"} == 1) by(node, name, minor)", + "format": "table", + "instant": true, + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "DRBD Resources with Storage Failure", + "transformations": [ + { + "id": "merge", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true, + "Value": true + }, + "indexByName": { + "Time": 0, + "Value": 4, + "minor": 3, + "name": 1, + "node": 2 + }, + "renameByName": { + "Value": "", + "Value #A": "Quorum?", + "instance": "Instance", + "minor": "Minor", + "name": "DRBD Resource", + "node": "Node" + } + } + } + ], + "type": "table" + } + ], + "refresh": "30s", + "schemaVersion": 32, + "style": "dark", + "tags": [ + "storage", + "piraeus" + ], + "templating": { + "list": [ + { + "current": { + "text": "default", + "value": "default" + }, + "hide": 0, + "label": "Data Source", + "name": "datasource", + "options": [ + + ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": null, + "current": {}, + "datasource": "${datasource}", + "definition": "", + "description": null, + "error": null, + "hide": 0, + "includeAll": true, + "label": null, + "multi": true, + "name": "node", + "options": [], + "query": "label_values(drbdreactor_up, node)", + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-3h", + "to": "now" + }, + "timepicker": {}, + "timezone": "", + "title": "Piraeus Datastore", + "uid": "f_tZtVlMz", + "version": 0 +} diff --git a/packages/extra/monitoring/dashboards.list b/packages/extra/monitoring/dashboards.list index e0247db7..dec05cf5 100644 --- a/packages/extra/monitoring/dashboards.list +++ b/packages/extra/monitoring/dashboards.list @@ -37,3 +37,4 @@ flux/flux-stats kafka/strimzi-kafka goldpinger/goldpinger clickhouse/altinity-clickhouse-operator-dashboard +storage/linstor \ No newline at end of file diff --git a/packages/system/linstor/templates/podscrape.yaml b/packages/system/linstor/templates/podscrape.yaml new file mode 100644 index 00000000..91b2de49 --- /dev/null +++ b/packages/system/linstor/templates/podscrape.yaml @@ -0,0 +1,44 @@ +apiVersion: operator.victoriametrics.com/v1beta1 +kind: VMPodScrape +metadata: + name: linstor-satellite + namespace: cozy-linstor +spec: + podMetricsEndpoints: + - port: prometheus + scheme: http + relabelConfigs: + - action: labeldrop + regex: (endpoint|namespace|pod|container) + - replacement: linstor-controller + targetLabel: job + - sourceLabels: [__meta_kubernetes_pod_node_name] + targetLabel: node + - targetLabel: tier + replacement: cluster + selector: + matchLabels: + app.kubernetes.io/component: linstor-satellite +--- +apiVersion: operator.victoriametrics.com/v1beta1 +kind: VMPodScrape +metadata: + name: linstor-controller + namespace: cozy-linstor +spec: + podMetricsEndpoints: + - path: /metrics + port: api + scheme: http + relabelConfigs: + - action: labeldrop + regex: (endpoint|namespace|pod|container) + - replacement: linstor-satellite + targetLabel: job + - sourceLabels: [__meta_kubernetes_pod_node_name] + targetLabel: node + - targetLabel: tier + replacement: cluster + selector: + matchLabels: + app.kubernetes.io/component: linstor-controller diff --git a/packages/system/piraeus-operator/alerts/piraeus-datastore.yaml b/packages/system/piraeus-operator/alerts/piraeus-datastore.yaml new file mode 100644 index 00000000..77512847 --- /dev/null +++ b/packages/system/piraeus-operator/alerts/piraeus-datastore.yaml @@ -0,0 +1,116 @@ +--- +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: piraeus-datastore +spec: + groups: + - name: linstor.rules + rules: + - alert: linstorControllerOffline + annotations: + description: | + LINSTOR Controller is not reachable. + expr: up{job="linstor-controller"} == 0 + labels: + severity: critical + - alert: linstorSatelliteErrorRate + annotations: + description: | + LINSTOR Satellite "{{ $labels.name }}" reports {{ $value }} errors in the last 15 minutes. + Use "linstor error-reports list --nodes {{ $labels.name }} --since 15minutes" to see them. + expr: increase(linstor_error_reports_count{module="SATELLITE"}[15m]) > 0 + labels: + severity: warning + - alert: linstorControllerErrorRate + annotations: + description: | + LINSTOR Controller reports {{ $value }} errors in the last 15 minutes. + Use "linstor error-reports list --since 15minutes" to see them. + expr: increase(linstor_error_reports_count{module="CONTROLLER"}[15m]) > 0 + labels: + severity: warning + - alert: linstorSatelliteNotOnline + annotations: + description: | + LINSTOR Satellite "{{ $labels.name }}" is not ONLINE. + Check that the Satellite is running and reachable from the LINSTOR Controller. + expr: linstor_node_state{nodetype="SATELLITE"} != 2 + labels: + severity: critical + - alert: linstorStoragePoolErrors + annotations: + description: | + Storage pool "{{ $labels.storage_pool }}" on node "{{ $labels.node }}" ({{ $labels.driver }}={{ $labels.backing_pool }}) is reporting errors. + expr: linstor_storage_pool_error_count > 0 + labels: + severity: critical + - alert: linstorStoragePoolAtCapacity + annotations: + description: | + Storage pool "{{ $labels.storage_pool }}" on node "{{ $labels.node }}" ({{ $labels.driver }}={{ $labels.backing_pool }}) has less than 20% free space available. + expr: ( linstor_storage_pool_capacity_free_bytes / linstor_storage_pool_capacity_total_bytes ) < 0.20 + labels: + severity: warn + - name: drbd.rules + rules: + - alert: drbdReactorOffline + annotations: + description: | + DRBD Reactor on "{{ $labels.node }}" is not reachable. + expr: up{job="piraeus-datastore/linstor-satellite"} == 0 + labels: + severity: critical + - alert: drbdConnectionNotConnected + annotations: + description: | + DRBD Resource "{{ $labels.name }}" on "{{ $labels.node }}" is not connected to "{{ $labels.conn_name }}": {{ $labels.drbd_connection_state }}. + expr: drbd_connection_state{drbd_connection_state!="Connected"} > 0 + labels: + severity: warn + - alert: drbdDeviceNotUpToDate + annotations: + description: | + DRBD device "{{ $labels.name }}" on "{{ $labels.node }}" has unexpected device state "{{ $labels.drbd_device_state }}". + expr: drbd_device_state{drbd_device_state!~"UpToDate|Diskless"} > 0 + labels: + severity: warn + - alert: drbdDeviceUnintentionalDiskless + annotations: + description: | + DRBD device "{{ $labels.name }}" on "{{ $labels.node }}" is unintenionally diskless. + This usually indicates IO errors reported on the backing device. Check the kernel log. + expr: drbd_device_unintentionaldiskless > 0 + labels: + severity: warn + - alert: drbdDeviceWithoutQuorum + annotations: + description: | + DRBD device "{{ $labels.name }}" on "{{ $labels.node }}" has no quorum. + This usually indicates connectivity issues. + expr: drbd_device_quorum == 0 + labels: + severity: warn + - alert: drbdResourceSuspended + annotations: + description: | + DRBD resource "{{ $labels.name }}" on "{{ $labels.node }}" has been suspended for 1m. + for: 1m + expr: drbd_resource_suspended > 0 + labels: + severity: warn + - alert: drbdResourceResyncWithoutProgress + annotations: + description: | + DRBD resource "{{ $labels.name }}" on "{{ $labels.node }}" has been in Inconsistent without resync progress for 5 minutes. + This may indicate there is no connection to UpToDate data, or a stuck resync. + expr: drbd_device_state{drbd_device_state="Inconsistent"} and delta(drbd_peerdevice_outofsync_bytes[5m]) >= 0 + labels: + severity: warn + - alert: drbdResourceWithNoUpToDateReplicas + annotations: + description: | + DRBD resource "{{ $labels.name }}" has no UpToDate replicas. + expr: sum by (name) (drbd_device_state{drbd_device_state="UpToDate"}) == 0 + labels: + severity: critical diff --git a/packages/system/piraeus-operator/templates/alerts.yaml b/packages/system/piraeus-operator/templates/alerts.yaml new file mode 100644 index 00000000..70d47014 --- /dev/null +++ b/packages/system/piraeus-operator/templates/alerts.yaml @@ -0,0 +1,7 @@ +{{- $files := .Files.Glob "alerts/*.yaml" -}} +{{- range $path, $file := $files }} +--- +# from: {{ $path }} +{{ toString $file }} + +{{- end -}}