{ "annotations": { "list": [ { "builtIn": 1, "datasource": { "type": "datasource", "uid": "grafana" }, "enable": true, "hide": true, "iconColor": "rgba(0, 211, 255, 1)", "name": "Annotations & Alerts", "target": { "limit": 100, "matchAny": false, "tags": [], "type": "dashboard" }, "type": "dashboard" } ] }, "editable": false, "fiscalYearStartMonth": 0, "graphTooltip": 1, "iteration": 1684513475244, "links": [], "liveNow": false, "panels": [ { "datasource": { "type": "prometheus", "uid": "$ds_prometheus" }, "description": "Note that this table shows the average values for the entire period selected in the dashboard. Consequently, it may contain information about Pods or namespaces that were changed or deleted during the selected period.", "fieldConfig": { "defaults": { "color": { "mode": "thresholds" }, "custom": { "displayMode": "auto", "filterable": false, "inspect": false, "minWidth": 70 }, "decimals": 2, "displayName": "", "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": null }, { "color": "red", "value": 80 } ] }, "unit": "short" }, "overrides": [ { "matcher": { "id": "byName", "options": "controller" }, "properties": [ { "id": "unit", "value": "short" }, { "id": "decimals", "value": 2 }, { "id": "custom.align", "value": "auto" }, { "id": "displayName", "value": "Controller" }, { "id": "custom.width" }, { "id": "links", "value": [ { "targetBlank": true, "title": "Controller Details", "url": "/d/IRPuf4ymk1/controller?var-ds_prometheus=${ds_prometheus}&var-namespace=${namespace}&var-controller=${__value.raw}&${__url_time_range}" } ] }, { "id": "custom.minWidth", "value": 70 } ] }, { "matcher": { "id": "byName", "options": "Value #B" }, "properties": [ { "id": "displayName", "value": "VPA Mode" }, { "id": "unit", "value": "short" }, { "id": "decimals", "value": 2 }, { "id": "custom.align" }, { "id": "mappings", "value": [ { "options": { "0": { "text": "Off" }, "1": { "text": "Initial" }, "2": { "text": "Auto" }, "3": { "text": "Recreate" }, "5": { "text": "-" } }, "type": "value" } ] } ] }, { "matcher": { "id": "byName", "options": "Value #C" }, "properties": [ { "id": "displayName", "value": "CPU" }, { "id": "unit", "value": "short" }, { "id": "decimals", "value": 3 }, { "id": "custom.align" } ] }, { "matcher": { "id": "byName", "options": "Value #D" }, "properties": [ { "id": "displayName", "value": "Req CPU" }, { "id": "unit", "value": "short" }, { "id": "decimals", "value": 2 }, { "id": "custom.align" } ] }, { "matcher": { "id": "byName", "options": "Value #E" }, "properties": [ { "id": "displayName", "value": "VPA CPU" }, { "id": "unit", "value": "short" }, { "id": "decimals", "value": 2 }, { "id": "custom.align" } ] }, { "matcher": { "id": "byName", "options": "Value #F" }, "properties": [ { "id": "displayName", "value": "Over-req CPU" }, { "id": "unit", "value": "short" }, { "id": "decimals", "value": 2 }, { "id": "custom.align" } ] }, { "matcher": { "id": "byName", "options": "Value #G" }, "properties": [ { "id": "displayName", "value": "Under-req CPU" }, { "id": "unit", "value": "short" }, { "id": "decimals", "value": 2 }, { "id": "custom.align" } ] }, { "matcher": { "id": "byName", "options": "Value #H" }, "properties": [ { "id": "displayName", "value": "Throttling cores" }, { "id": "unit", "value": "short" }, { "id": "decimals", "value": 2 }, { "id": "custom.align" } ] }, { "matcher": { "id": "byName", "options": "Value #I" }, "properties": [ { "id": "displayName", "value": "Memory" }, { "id": "unit", "value": "bytes" }, { "id": "decimals", "value": 2 }, { "id": "custom.align" } ] }, { "matcher": { "id": "byName", "options": "Value #J" }, "properties": [ { "id": "displayName", "value": "Req Memory" }, { "id": "unit", "value": "bytes" }, { "id": "decimals", "value": 2 }, { "id": "custom.align" } ] }, { "matcher": { "id": "byName", "options": "Value #K" }, "properties": [ { "id": "displayName", "value": "VPA Memory" }, { "id": "unit", "value": "bytes" }, { "id": "decimals", "value": 2 }, { "id": "custom.align" } ] }, { "matcher": { "id": "byName", "options": "Value #L" }, "properties": [ { "id": "displayName", "value": "Over-req Memory" }, { "id": "unit", "value": "bytes" }, { "id": "decimals", "value": 2 }, { "id": "custom.align" } ] }, { "matcher": { "id": "byName", "options": "Value #M" }, "properties": [ { "id": "displayName", "value": "Under-req Memory" }, { "id": "unit", "value": "bytes" }, { "id": "decimals", "value": 2 }, { "id": "custom.align" } ] }, { "matcher": { "id": "byName", "options": "Value #N" }, "properties": [ { "id": "displayName", "value": "RX Network" }, { "id": "unit", "value": "Bps" }, { "id": "decimals", "value": 2 }, { "id": "custom.align" }, { "id": "mappings", "value": [ { "options": { "-1": { "index": 0, "text": "hostNet" } }, "type": "value" } ] } ] }, { "matcher": { "id": "byName", "options": "Value #O" }, "properties": [ { "id": "displayName", "value": "TX Network" }, { "id": "unit", "value": "Bps" }, { "id": "decimals", "value": 2 }, { "id": "custom.align" }, { "id": "mappings", "value": [ { "options": { "-1": { "index": 0, "text": "hostNet" } }, "type": "value" } ] } ] }, { "matcher": { "id": "byName", "options": "Value #P" }, "properties": [ { "id": "displayName", "value": "Read IOPS" }, { "id": "unit", "value": "iops" }, { "id": "decimals", "value": 2 }, { "id": "custom.align" } ] }, { "matcher": { "id": "byName", "options": "Value #Q" }, "properties": [ { "id": "displayName", "value": "Write IOPS" }, { "id": "unit", "value": "short" }, { "id": "decimals", "value": 2 }, { "id": "custom.align" } ] }, { "matcher": { "id": "byName", "options": "Value #R" }, "properties": [ { "id": "displayName", "value": "Total replicas count" }, { "id": "unit", "value": "short" }, { "id": "custom.align" } ] }, { "matcher": { "id": "byName", "options": "Value #S" }, "properties": [ { "id": "displayName", "value": "Not ready replicas" }, { "id": "unit", "value": "short" }, { "id": "custom.align" } ] }, { "matcher": { "id": "byName", "options": "Value #T" }, "properties": [ { "id": "displayName", "value": "Pod Restarts" }, { "id": "unit", "value": "short" }, { "id": "custom.align" } ] }, { "matcher": { "id": "byName", "options": "Controller" }, "properties": [ { "id": "custom.width", "value": 230 } ] } ] }, "gridPos": { "h": 8, "w": 24, "x": 0, "y": 0 }, "id": 4, "links": [], "maxPerRow": 6, "options": { "footer": { "fields": "", "reducer": [ "sum" ], "show": false }, "showHeader": true, "sortBy": [] }, "pluginVersion": "8.5.13", "repeatDirection": "h", "targets": [ { "expr": "max by (job, namespace, controller, controller_name) (avg_over_time(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"}[$__range]))", "format": "table", "hide": false, "instant": true, "interval": "", "intervalFactor": 1, "legendFormat": "Controller", "refId": "A" }, { "expr": "sum by (controller) (avg_over_time(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"}[$__range]) * on (controller_type, controller_name) group_left() sum by (controller_type, controller_name) (avg_over_time(vpa_target_recommendation{namespace=\"$namespace\", update_mode=\"Off\"}[$__range]))) * 0\nor\nsum by (controller) (avg_over_time(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"}[$__range]) * on (controller_type, controller_name) group_left() sum by (controller_type, controller_name) (avg_over_time(vpa_target_recommendation{namespace=\"$namespace\", update_mode=\"Initial\"}[$__range]))) * 0 + 1\nor\nsum by (controller) (avg_over_time(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"}[$__range]) * on (controller_type, controller_name) group_left() sum by (controller_type, controller_name) (avg_over_time(vpa_target_recommendation{namespace=\"$namespace\", update_mode=\"Auto\"}[$__range]))) * 0 + 2\nor\nsum by (controller) (avg_over_time(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"}[$__range]) * on (controller_type, controller_name) group_left() sum by (controller_type, controller_name) (avg_over_time(vpa_target_recommendation{namespace=\"$namespace\", update_mode=\"Recreate\"}[$__range]))) * 0 + 3\nor\nsum by (controller) (avg_over_time(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"}[$__range])) * 0 + 5", "format": "table", "hide": false, "instant": true, "intervalFactor": 1, "legendFormat": "VPA Mode", "refId": "B" }, { "expr": "sum by (controller) (avg_over_time(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"}[$__range]) * on (pod) group_left() sum by (pod) (rate(container_cpu_usage_seconds_total{node=~\"$node\", namespace=\"$namespace\", container!=\"POD\"}[$__range])))\nor\ncount (avg_over_time(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"}[$__range])) by (controller) * 0", "format": "table", "instant": true, "intervalFactor": 1, "legendFormat": "CPU", "refId": "C" }, { "expr": "sum by (controller)\n (\n avg_over_time(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"}[$__range])\n * on (pod) group_left()\n sum by (pod) (avg_over_time(kube_pod_container_resource_requests{resource=\"cpu\",unit=\"core\",node=~\"$node\", namespace=\"$namespace\"}[$__range]))\n )\nor\ncount (avg_over_time(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"}[$__range])) by (controller) * 0", "format": "table", "instant": true, "intervalFactor": 1, "legendFormat": "Req CPU", "refId": "D" }, { "expr": "sum by (controller)\n (\n avg_over_time(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"}[$__range])\n * on (controller_type, controller_name) group_left()\n sum by(controller_type, controller_name) (avg_over_time(vpa_target_recommendation{container!=\"POD\",namespace=\"$namespace\", resource=\"cpu\"}[$__range]))\n ) \nor\ncount (avg_over_time(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"}[$__range])) by (controller) * 0", "format": "table", "instant": true, "intervalFactor": 1, "legendFormat": "VPA CPU", "refId": "E" }, { "expr": "sum by (controller)\n (\n avg_over_time(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"}[$__range])\n * on (namespace, pod) group_left()\n sum by (namespace, pod)\n (\n (\n sum by(namespace, pod, container) (avg_over_time(kube_pod_container_resource_requests{resource=\"cpu\",unit=\"core\",node=~\"$node\", namespace=\"$namespace\"}[$__range]))\n -\n sum by(namespace, pod, container) (rate(container_cpu_usage_seconds_total{node=~\"$node\", container!=\"POD\", namespace=\"$namespace\"}[$__range]))\n ) > 0\n )\n )\nor\ncount (avg_over_time(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"}[$__range])) by (controller) * 0", "format": "table", "instant": true, "intervalFactor": 1, "legendFormat": "Over-req CPU", "refId": "F" }, { "expr": "sum by (controller)\n (\n avg_over_time(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"}[$__range])\n * on (namespace, pod) group_left()\n sum by (namespace, pod)\n (\n (\n (\n sum by(namespace, pod, container) (rate(container_cpu_usage_seconds_total{node=~\"$node\", namespace=\"$namespace\"}[$__range]))\n -\n sum by(namespace, pod, container) (avg_over_time(kube_pod_container_resource_requests{resource=\"cpu\",unit=\"core\",node=~\"$node\", container!=\"POD\", namespace=\"$namespace\"}[$__range]))\n ) or sum by(namespace, pod, container) (rate(container_cpu_usage_seconds_total{node=~\"$node\", container!=\"POD\", namespace=\"$namespace\"}[$__range]))\n ) > 0\n )\n )\nor\ncount (avg_over_time(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"}[$__range])) by (controller) * 0", "format": "table", "instant": true, "intervalFactor": 1, "legendFormat": "Under-req CPU", "refId": "G" }, { "expr": "sum by (controller) (avg_over_time(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"}[$__range]) * on (pod) group_left() ((sum by (pod) (rate(container_cpu_cfs_throttled_periods_total{node=~\"$node\", namespace=\"$namespace\"}[$__range])) / sum by (pod) (rate(container_cpu_cfs_periods_total{node=~\"$node\", namespace=\"$namespace\"}[$__range]))) * sum by (pod) (rate(kube_pod_container_resource_limits{resource=\"cpu\",unit=\"core\",node=~\"$node\", namespace=\"$namespace\"}[$__range]))))\nor\ncount (avg_over_time(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"}[$__range])) by (controller) * 0", "format": "table", "instant": true, "intervalFactor": 1, "legendFormat": "Throttling cores", "refId": "H" }, { "expr": "sum by (controller) (avg_over_time(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"}[$__range]) * on (pod) group_left() sum by (pod) (avg_over_time(container_memory_working_set_bytes:without_kmem{node=~\"$node\", namespace=\"$namespace\", container!=\"POD\"}[$__range])))\nor\ncount (avg_over_time(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"}[$__range])) by (controller) * 0", "format": "table", "instant": true, "intervalFactor": 1, "legendFormat": "Memory", "refId": "I" }, { "expr": "sum by (controller)\n (\n avg_over_time(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"}[$__range])\n * on (pod) group_left()\n sum by (namespace, pod)\n (\n avg_over_time(kube_pod_container_resource_requests{resource=\"memory\",unit=\"byte\",node=~\"$node\", namespace=\"$namespace\", container!=\"POD\"}[$__range])\n )\n )\n or\n count (avg_over_time(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"}[$__range])) by (controller) * 0", "format": "table", "instant": true, "intervalFactor": 1, "legendFormat": "Req Memory", "refId": "J" }, { "expr": "sum by (controller)\n (\n avg_over_time(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"}[$__range])\n * on (controller_type, controller_name) group_left()\n sum by(controller_type, controller_name) (avg_over_time(vpa_target_recommendation{container!=\"POD\",namespace=\"$namespace\", resource=\"memory\"}[$__range]))\n ) \n or \ncount (avg_over_time(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"}[$__range])) by (controller) * 0", "format": "table", "instant": true, "intervalFactor": 1, "legendFormat": "VPA Memory", "refId": "K" }, { "expr": "sum by (controller)\n (\n avg_over_time(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"}[$__range])\n * on (namespace, pod) group_left()\n sum by (namespace, pod)\n (\n (\n sum by(namespace, pod, container) (avg_over_time(kube_pod_container_resource_requests{resource=\"memory\",unit=\"byte\",node=~\"$node\", namespace=\"$namespace\"}[$__range]))\n -\n sum by(namespace, pod, container) (avg_over_time(container_memory_working_set_bytes:without_kmem{node=~\"$node\", container!=\"POD\", namespace=\"$namespace\"}[$__range]))\n ) > 0\n )\n )\nor\ncount (avg_over_time(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"}[$__range])) by (controller) * 0", "format": "table", "instant": true, "intervalFactor": 1, "legendFormat": "Over-req Memory", "refId": "L" }, { "expr": "sum by (controller)\n (\n avg_over_time(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"}[$__range])\n * on (namespace, pod) group_left()\n sum by (namespace, pod)\n (\n (\n (\n sum by(namespace, pod, container) (avg_over_time(container_memory_working_set_bytes:without_kmem{node=~\"$node\", namespace=\"$namespace\"}[$__range]))\n -\n sum by(namespace, pod, container) (avg_over_time(kube_pod_container_resource_requests{resource=\"memory\",unit=\"byte\",node=~\"$node\", container!=\"POD\", namespace=\"$namespace\"}[$__range]))\n ) or sum by(namespace, pod, container) (avg_over_time(container_memory_working_set_bytes:without_kmem{node=~\"$node\", container!=\"POD\", namespace=\"$namespace\"}[$__range]))\n ) > 0\n )\n )\nor\ncount (avg_over_time(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"}[$__range])) by (controller) * 0", "format": "table", "instant": true, "intervalFactor": 1, "legendFormat": "Under-req Memory", "refId": "M" }, { "expr": "sum by(controller) ( # Data rate of the controller is a sum of data rates from its Pods.\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"} \n * on(pod)\n group_left() # Do not drop controller label from kube_controller_pod.\n sum by(pod) # Use sum if there are multiple interaces in the Pod.\n (\n rate(container_network_receive_bytes_total{node=~\"$node\", namespace=\"$namespace\"}[$__range])\n * # Select only Pods with 'hostNetwork: false', because receive_bytes values are meaningful only for Pods with hostNetwork: false.\n on(pod)\n kube_pod_info{host_network=\"false\", namespace=\"$namespace\"}\n )\n)\nor # Return -1 value for Pods with 'hostNetwork: true' to rewrite by value mapping.\n# Use max to get one '-1' per controller\n(max by(controller) (\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"}\n * on(pod)\n group_left() # Do not drop controller label from kube_controller_pod.\n (kube_pod_info{host_network=\"true\", namespace=\"$namespace\"})\n) * -1)", "format": "table", "instant": true, "intervalFactor": 1, "legendFormat": "RX Network", "refId": "N" }, { "expr": "sum by(controller) ( # Data rate of the controller is a sum of data rates from its Pods.\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"} \n * on(pod)\n group_left() # Do not drop controller label from kube_controller_pod.\n sum by(pod) # Use sum if there are multiple interaces in the Pod.\n (\n rate(container_network_transmit_bytes_total{node=~\"$node\", namespace=\"$namespace\"}[$__range])\n * # Select only Pods with 'hostNetwork: false', because receive_bytes values are meaningful only for Pods with hostNetwork: false.\n on(pod)\n kube_pod_info{host_network=\"false\", namespace=\"$namespace\"}\n )\n)\nor # Return -1 value for Pods with 'hostNetwork: true' to rewrite by value mapping.\n# Use max to get one '-1' per controller\n(max by(controller) (\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"}\n * on(pod)\n group_left() # Do not drop controller label from kube_controller_pod.\n (kube_pod_info{host_network=\"true\", namespace=\"$namespace\"})\n) * -1)", "format": "table", "instant": true, "intervalFactor": 1, "legendFormat": "TX Network", "refId": "O" }, { "expr": "sum by (controller) (avg_over_time(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"}[$__range]) * on (pod) group_left() sum by (pod) (rate(container_fs_reads_total{node=~\"$node\", container!=\"POD\", namespace=\"$namespace\"}[$__range])))\nor\ncount (avg_over_time(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"}[$__range])) by (controller) * 0", "format": "table", "instant": true, "intervalFactor": 1, "legendFormat": "Read IOPS", "refId": "P" }, { "expr": "sum by (controller) (avg_over_time(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"}[$__range]) * on (pod) group_left() sum by (pod) (rate(container_fs_writes_total{node=~\"$node\", container!=\"POD\", namespace=\"$namespace\"}[$__range])))\nor\ncount (avg_over_time(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"}[$__range])) by (controller) * 0", "format": "table", "instant": true, "intervalFactor": 1, "legendFormat": "Write IOPS", "refId": "Q" }, { "expr": "max by (controller) (max by (namespace, controller_type, controller_name, controller) (avg_over_time(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"}[$__range])) * on (namespace, controller_type, controller_name) group_left() max by (namespace, controller_type, controller_name) (avg_over_time(kube_controller_replicas{node=~\"$node\", namespace=\"$namespace\"}[$__range])))\nor\ncount (avg_over_time(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"}[$__range])) by (controller) * 0", "format": "table", "hide": false, "instant": true, "intervalFactor": 1, "legendFormat": "Total replicas count", "refId": "R" }, { "expr": "max by (job, namespace, controller_type, controller_name, controller) ((max by (job, namespace, controller_type, controller_name, controller) (avg_over_time(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"}[$__range]))) * on (job, namespace, controller_type, controller_name) group_right(controller)\n(avg_over_time(kube_controller_replicas[$__range]) - avg_over_time(kube_controller_replicas_ready[$__range])))\nor\nmax by (job, namespace, controller_type, controller_name, controller) (avg_over_time(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"}[$__range])) * 0", "format": "table", "hide": false, "instant": true, "intervalFactor": 1, "legendFormat": "Not ready replicas", "refId": "S" }, { "expr": "sum by (controller) \n (\n avg_over_time(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"}[$__range])\n * on (pod) group_left()\n sum by (pod) (increase(kube_pod_container_status_restarts_total{node=~\"$node\", namespace=\"$namespace\"}[$__range]))\n )\nor\ncount (avg_over_time(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"}[$__range])) by (controller) * 0", "format": "table", "hide": false, "instant": true, "intervalFactor": 1, "legendFormat": "Pod restart count", "refId": "T" }, { "expr": "sum by (controller) (avg_over_time(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"}[$__range]) * on (pod) group_left() sum by (pod) (avg_over_time(container_memory:kmem{node=~\"$node\", namespace=\"$namespace\", container!=\"POD\"}[$__range])))\nor\ncount (avg_over_time(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"}[$__range])) by (controller) * 0", "format": "table", "instant": true, "intervalFactor": 1, "legendFormat": "Memory", "refId": "U" } ], "title": "Controllers", "transformations": [ { "id": "merge", "options": {} }, { "id": "filterFieldsByName", "options": { "include": { "names": [ "controller", "Value #B", "Value #C", "Value #D", "Value #E", "Value #F", "Value #G", "Value #H", "Value #I", "Value #J", "Value #K", "Value #L", "Value #M", "Value #N", "Value #O", "Value #P", "Value #Q", "Value #R", "Value #S", "Value #T" ] } } }, { "id": "sortBy", "options": { "fields": {}, "sort": [ { "field": "controller" } ] } } ], "type": "table" }, { "cards": { "cardHSpacing": 2, "cardMinWidth": 5, "cardVSpacing": 2 }, "color": { "cardColor": "#b4ff00", "colorScale": "sqrt", "colorScheme": "interpolateGnYlRd", "defaultColor": "#757575", "exponent": 0.5, "mode": "discrete", "thresholds": [ { "color": "#bf1b00", "tooltip": "100% not ready", "value": "0" }, { "color": "#508642", "tooltip": "100% ready", "value": "1" }, { "color": "#ea6460", "tooltip": "60-99% not ready", "value": "2" }, { "color": "#e5ac0e", "tooltip": "30-59% not ready", "value": "3" }, { "color": "#f4d598", "tooltip": "1-29% not ready", "value": "4" } ] }, "datasource": { "uid": "$ds_prometheus" }, "gridPos": { "h": 8, "w": 24, "x": 0, "y": 8 }, "hideBranding": false, "highlightCards": true, "id": 223, "legend": { "show": true }, "links": [], "nullPointMode": "as empty", "pageSize": 15, "seriesFilterIndex": -1, "statusmap": { "ConfigVersion": "v1" }, "targets": [ { "expr": "((max by (job, namespace, controller_type, controller_name, controller) (kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"})) * on (job, namespace, controller_type, controller_name) group_right(controller) \n(kube_controller_replicas_ready/kube_controller_replicas == 1)) * 1", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{ controller }}", "refId": "A" }, { "expr": "((max by (job, namespace, controller_type, controller_name, controller) (kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"})) * on (job, namespace, controller_type, controller_name) group_right(controller) \n((kube_controller_replicas_ready/kube_controller_replicas > 0) and (kube_controller_replicas_ready/kube_controller_replicas < 0.3)) > bool 0) * 2", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{ controller }}", "refId": "B" }, { "expr": "((max by (job, namespace, controller_type, controller_name, controller) (kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"})) * on (job, namespace, controller_type, controller_name) group_right(controller) \n((kube_controller_replicas_ready/kube_controller_replicas >= 0.3) and (kube_controller_replicas_ready/kube_controller_replicas < 0.6)) > bool 0) * 3", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{ controller }}", "refId": "C" }, { "expr": "((max by (job, namespace, controller_type, controller_name, controller) (kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"})) * on (job, namespace, controller_type, controller_name) group_right(controller) \n((kube_controller_replicas_ready/kube_controller_replicas >= 0.6) and (kube_controller_replicas_ready/kube_controller_replicas < 1)) > bool 0) * 4", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{ controller }}", "refId": "D" }, { "expr": "((max by (job, namespace, controller_type, controller_name, controller) (kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"})) * on (job, namespace, controller_type, controller_name) group_right(controller)\n(kube_controller_replicas_ready/kube_controller_replicas == 0)) * 5", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{ controller }}", "refId": "E" } ], "title": "Status", "tooltip": { "extraInfo": "", "freezeOnClick": true, "items": [], "show": true, "showExtraInfo": false, "showItems": false }, "type": "flant-statusmap-panel", "useMax": true, "usingPagination": false, "xAxis": { "show": true }, "yAxis": { "maxWidth": -1, "minWidth": -1, "show": true }, "yAxisSort": "metrics", "yLabel": { "delimiter": "", "labelTemplate": "", "usingSplitLabel": false } }, { "aliasColors": {}, "bars": true, "dashLength": 10, "dashes": false, "datasource": { "uid": "$ds_prometheus" }, "decimals": 0, "description": "The number of Pods controlled by the Controller", "fieldConfig": { "defaults": { "links": [] }, "overrides": [] }, "fill": 1, "fillGradient": 0, "gridPos": { "h": 4, "w": 12, "x": 0, "y": 16 }, "hiddenSeries": false, "id": 794, "legend": { "alignAsTable": false, "avg": false, "current": false, "hideEmpty": true, "hideZero": true, "max": false, "min": false, "rightSide": false, "show": false, "sort": "avg", "sortDesc": true, "total": false, "values": false }, "lines": false, "linewidth": 1, "links": [], "nullPointMode": "null as zero", "options": { "alertThreshold": true }, "percentage": false, "pluginVersion": "8.5.13", "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [ { "alias": "Total", "bars": false, "stack": false } ], "spaceLength": 10, "stack": true, "steppedLine": false, "targets": [ { "expr": "sum by (controller) \n (\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"}\n * on (pod) group_left()\n sum by (pod) (max_over_time(kube_pod_info{node=~\"$node\", namespace=\"$namespace\"}[$__rate_interval]))\n )", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{ controller }}", "refId": "A" }, { "expr": "sum \n (\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"}\n * on (pod) group_left()\n sum by (pod) (max_over_time(kube_pod_info{node=~\"$node\", namespace=\"$namespace\"}[$__rate_interval]))\n )", "format": "time_series", "intervalFactor": 1, "legendFormat": "Total", "refId": "B" } ], "thresholds": [], "timeRegions": [], "title": "Pods count", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "mode": "time", "show": true, "values": [] }, "yaxes": [ { "decimals": 0, "format": "short", "label": "", "logBase": 1, "min": "0", "show": true }, { "format": "short", "logBase": 1, "show": false } ], "yaxis": { "align": false } }, { "aliasColors": {}, "bars": true, "dashLength": 10, "dashes": false, "datasource": { "uid": "$ds_prometheus" }, "decimals": 0, "description": "The number of Pod restarts", "fieldConfig": { "defaults": { "links": [] }, "overrides": [] }, "fill": 1, "fillGradient": 0, "gridPos": { "h": 4, "w": 12, "x": 12, "y": 16 }, "hiddenSeries": false, "id": 661, "legend": { "alignAsTable": false, "avg": false, "current": false, "hideEmpty": true, "hideZero": true, "max": false, "min": false, "rightSide": false, "show": false, "sort": "avg", "sortDesc": true, "total": false, "values": false }, "lines": false, "linewidth": 1, "links": [], "nullPointMode": "null as zero", "options": { "alertThreshold": true }, "percentage": false, "pluginVersion": "8.5.13", "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [ { "alias": "Total", "bars": false, "stack": false } ], "spaceLength": 10, "stack": true, "steppedLine": false, "targets": [ { "expr": "sum by (controller) \n (\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"}\n * on (pod) group_left()\n sum by (pod) (increase(kube_pod_container_status_restarts_total{node=~\"$node\", namespace=\"$namespace\"}[$__rate_interval]))\n )", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{ controller }}", "refId": "A" }, { "expr": "sum \n (\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"}\n * on (pod) group_left()\n sum by (pod) (increase(kube_pod_container_status_restarts_total{node=~\"$node\", namespace=\"$namespace\"}[$__rate_interval]))\n )", "format": "time_series", "intervalFactor": 1, "legendFormat": "Total", "refId": "B" } ], "thresholds": [], "timeRegions": [], "title": "Pods restarts", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "mode": "time", "show": true, "values": [] }, "yaxes": [ { "decimals": 0, "format": "short", "label": "", "logBase": 1, "min": "0", "show": true }, { "format": "short", "logBase": 1, "show": false } ], "yaxis": { "align": false } }, { "collapsed": false, "datasource": { "type": "prometheus", "uid": "$ds_prometheus" }, "gridPos": { "h": 1, "w": 24, "x": 0, "y": 20 }, "id": 8, "panels": [], "title": "CPU", "type": "row" }, { "aliasColors": {}, "bars": true, "dashLength": 10, "dashes": false, "datasource": { "uid": "$ds_prometheus" }, "fieldConfig": { "defaults": { "links": [] }, "overrides": [] }, "fill": 1, "fillGradient": 0, "gridPos": { "h": 9, "w": 12, "x": 0, "y": 21 }, "hiddenSeries": false, "id": 6, "legend": { "alignAsTable": true, "avg": true, "current": false, "max": false, "min": false, "rightSide": true, "show": true, "sort": "avg", "sortDesc": true, "total": false, "values": true }, "lines": false, "linewidth": 1, "links": [], "nullPointMode": "null as zero", "options": { "alertThreshold": true }, "percentage": false, "pluginVersion": "8.5.13", "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [ { "alias": "Total", "bars": false, "stack": false } ], "spaceLength": 10, "stack": true, "steppedLine": true, "targets": [ { "expr": "sum by (controller) (kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"} * on (pod) group_left() sum by (pod) (rate(container_cpu_usage_seconds_total{node=~\"$node\", namespace=\"$namespace\", container!=\"POD\"}[$__rate_interval])))", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{ controller }}", "refId": "A" }, { "expr": "sum (sum by (controller) (kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"} * on (pod) group_left() sum by (pod) (rate(container_cpu_usage_seconds_total{node=~\"$node\", namespace=\"$namespace\", container!=\"POD\"}[$__rate_interval]))))", "format": "time_series", "intervalFactor": 1, "legendFormat": "Total", "refId": "B" } ], "thresholds": [], "timeRegions": [], "title": "Usage by controller", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "mode": "time", "show": true, "values": [] }, "yaxes": [ { "format": "short", "label": "cores", "logBase": 1, "min": "0", "show": true }, { "format": "short", "logBase": 1, "show": true } ], "yaxis": { "align": false } }, { "aliasColors": {}, "bars": true, "dashLength": 10, "dashes": false, "datasource": { "uid": "$ds_prometheus" }, "description": "The total value may not equal the sum of system and user times because of the kernel's cgroup accounting peculiarities. Read more here: https://www.kernel.org/doc/Documentation/cgroup-v1/cpuacct.txt", "fieldConfig": { "defaults": { "links": [] }, "overrides": [] }, "fill": 1, "fillGradient": 0, "gridPos": { "h": 9, "w": 12, "x": 12, "y": 21 }, "hiddenSeries": false, "id": 10, "legend": { "alignAsTable": true, "avg": true, "current": false, "max": false, "min": false, "rightSide": true, "show": true, "sort": "avg", "sortDesc": true, "total": false, "values": true }, "lines": false, "linewidth": 1, "links": [], "nullPointMode": "null as zero", "options": { "alertThreshold": true }, "percentage": false, "pluginVersion": "8.5.13", "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [ { "alias": "Total", "bars": false, "stack": false }, { "alias": "System", "color": "#e24d42" }, { "alias": "User", "color": "#1f78c1" } ], "spaceLength": 10, "stack": true, "steppedLine": true, "targets": [ { "expr": "sum (sum by (controller) (kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"} * on (pod) group_left() sum by (pod) (rate(container_cpu_system_seconds_total{node=~\"$node\", namespace=\"$namespace\", container!=\"POD\"}[$__rate_interval]))))", "format": "time_series", "intervalFactor": 1, "legendFormat": "System", "refId": "A" }, { "expr": "sum (sum by (controller) (kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"} * on (pod) group_left() sum by (pod) (rate(container_cpu_user_seconds_total{node=~\"$node\", namespace=\"$namespace\", container!=\"POD\"}[$__rate_interval]))))", "format": "time_series", "intervalFactor": 1, "legendFormat": "User", "refId": "B" }, { "expr": "sum (sum by (controller) (kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"} * on (pod) group_left() sum by (pod) (rate(container_cpu_usage_seconds_total{node=~\"$node\", namespace=\"$namespace\", container!=\"POD\"}[$__rate_interval]))))", "format": "time_series", "intervalFactor": 1, "legendFormat": "Total", "refId": "D" } ], "thresholds": [], "timeRegions": [], "title": "Usage by state", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "mode": "time", "show": true, "values": [] }, "yaxes": [ { "format": "short", "label": "cores", "logBase": 1, "min": "0", "show": true }, { "format": "short", "logBase": 1, "show": true } ], "yaxis": { "align": false } }, { "aliasColors": {}, "bars": true, "dashLength": 10, "dashes": false, "datasource": { "uid": "$ds_prometheus" }, "description": "This graph shows the requested CPU resources higher than the actual CPU consumption. In other words, it shows CPU resources that can be \"freed\" without affecting the service.", "fieldConfig": { "defaults": { "links": [] }, "overrides": [] }, "fill": 1, "fillGradient": 0, "gridPos": { "h": 9, "w": 12, "x": 0, "y": 30 }, "hiddenSeries": false, "id": 404, "legend": { "alignAsTable": true, "avg": true, "current": false, "max": false, "min": false, "rightSide": true, "show": true, "sort": "avg", "sortDesc": true, "total": false, "values": true }, "lines": false, "linewidth": 1, "links": [], "nullPointMode": "null as zero", "options": { "alertThreshold": true }, "percentage": false, "pluginVersion": "8.5.13", "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [ { "alias": "Total", "bars": false, "stack": false } ], "spaceLength": 10, "stack": true, "steppedLine": true, "targets": [ { "expr": "sum by (controller)\n (\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"}\n * on (namespace, pod) group_left()\n sum by (namespace, pod)\n (\n (\n sum by(namespace, pod, container) (avg_over_time(kube_pod_container_resource_requests{resource=\"cpu\",unit=\"core\",node=~\"$node\", namespace=\"$namespace\"}[$__rate_interval]))\n -\n sum by(namespace, pod, container) (rate(container_cpu_usage_seconds_total{node=~\"$node\", container!=\"POD\", namespace=\"$namespace\"}[$__rate_interval]))\n ) > 0\n )\n )", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{ controller }}", "refId": "A" }, { "expr": "sum\n (\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"}\n * on (namespace, pod) group_left()\n sum by (namespace, pod)\n (\n (\n sum by(namespace, pod, container) (avg_over_time(kube_pod_container_resource_requests{resource=\"cpu\",unit=\"core\",node=~\"$node\", namespace=\"$namespace\"}[$__rate_interval]))\n -\n sum by(namespace, pod, container) (rate(container_cpu_usage_seconds_total{node=~\"$node\", container!=\"POD\", namespace=\"$namespace\"}[$__rate_interval]))\n ) > 0\n )\n )", "format": "time_series", "intervalFactor": 1, "legendFormat": "Total", "refId": "B" } ], "thresholds": [], "timeRegions": [], "title": "Over-requested by controller", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "mode": "time", "show": true, "values": [] }, "yaxes": [ { "$$hashKey": "object:149", "format": "short", "label": "cores", "logBase": 1, "min": "0", "show": true }, { "$$hashKey": "object:150", "format": "short", "logBase": 1, "show": true } ], "yaxis": { "align": false } }, { "aliasColors": {}, "bars": true, "dashLength": 10, "dashes": false, "datasource": { "uid": "$ds_prometheus" }, "description": "This graph shows the requested CPU resources lower than the actual CPU consumption. In other words, it shows CPU resources that need to be \"reserved\" for the service to run smoothly.", "fieldConfig": { "defaults": { "links": [] }, "overrides": [] }, "fill": 1, "fillGradient": 0, "gridPos": { "h": 9, "w": 12, "x": 12, "y": 30 }, "hiddenSeries": false, "id": 538, "legend": { "alignAsTable": true, "avg": true, "current": false, "max": false, "min": false, "rightSide": true, "show": true, "sort": "avg", "sortDesc": true, "total": false, "values": true }, "lines": false, "linewidth": 1, "links": [], "nullPointMode": "null as zero", "options": { "alertThreshold": true }, "percentage": false, "pluginVersion": "8.5.13", "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [ { "alias": "Total", "bars": false, "stack": false } ], "spaceLength": 10, "stack": true, "steppedLine": true, "targets": [ { "expr": "sum by (controller)\n (\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"}\n * on (namespace, pod) group_left()\n sum by (namespace, pod)\n (\n (\n (\n sum by(namespace, pod, container) (rate(container_cpu_usage_seconds_total{node=~\"$node\", namespace=\"$namespace\"}[$__rate_interval]))\n -\n sum by(namespace, pod, container) (avg_over_time(kube_pod_container_resource_requests{resource=\"cpu\",unit=\"core\",node=~\"$node\", container!=\"POD\", namespace=\"$namespace\"}[$__rate_interval]))\n ) or sum by(namespace, pod, container) (rate(container_cpu_usage_seconds_total{node=~\"$node\", container!=\"POD\", namespace=\"$namespace\"}[$__rate_interval]))\n ) > 0\n )\n )", "format": "time_series", "instant": false, "intervalFactor": 1, "legendFormat": "{{ controller }}", "refId": "A" }, { "expr": "sum\n (\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"}\n * on (namespace, pod) group_left()\n sum by (namespace, pod)\n (\n (\n (\n sum by(namespace, pod, container) (rate(container_cpu_usage_seconds_total{node=~\"$node\", namespace=\"$namespace\"}[$__rate_interval]))\n -\n sum by(namespace, pod, container) (avg_over_time(kube_pod_container_resource_requests{resource=\"cpu\",unit=\"core\",node=~\"$node\", container!=\"POD\", namespace=\"$namespace\"}[$__rate_interval]))\n ) or sum by(namespace, pod, container) (rate(container_cpu_usage_seconds_total{node=~\"$node\", container!=\"POD\", namespace=\"$namespace\"}[$__rate_interval]))\n ) > 0\n )\n )", "format": "time_series", "intervalFactor": 1, "legendFormat": "Total", "refId": "B" } ], "thresholds": [], "timeRegions": [], "title": "Under-requested by controller", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "mode": "time", "show": true, "values": [] }, "yaxes": [ { "format": "short", "label": "cores", "logBase": 1, "min": "0", "show": true }, { "format": "short", "logBase": 1, "show": true } ], "yaxis": { "align": false } }, { "aliasColors": {}, "bars": true, "dashLength": 10, "dashes": false, "datasource": { "uid": "$ds_prometheus" }, "description": "The absence of data on the graph means that container resources are not set", "fieldConfig": { "defaults": { "links": [] }, "overrides": [] }, "fill": 1, "fillGradient": 0, "gridPos": { "h": 7, "w": 24, "x": 0, "y": 39 }, "hiddenSeries": false, "id": 262, "legend": { "alignAsTable": true, "avg": true, "current": false, "max": true, "min": false, "rightSide": true, "show": true, "sort": "avg", "sortDesc": true, "total": false, "values": true }, "lines": false, "linewidth": 1, "links": [], "nullPointMode": "null as zero", "options": { "alertThreshold": true }, "percentage": false, "pluginVersion": "8.5.13", "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [ { "alias": "Total", "bars": false, "stack": false } ], "spaceLength": 10, "stack": true, "steppedLine": false, "targets": [ { "expr": "sum by (controller) (kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"} * on (pod) group_left() ((sum by (pod) (rate(container_cpu_cfs_throttled_periods_total{node=~\"$node\", namespace=\"$namespace\"}[$__rate_interval])) / sum by (pod) (rate(container_cpu_cfs_periods_total{node=~\"$node\", namespace=\"$namespace\"}[$__rate_interval]))) * sum by (pod) (kube_pod_container_resource_limits{resource=\"cpu\",unit=\"core\",node=~\"$node\", namespace=\"$namespace\"})))", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{ controller }}", "refId": "A" }, { "expr": "sum (sum by (controller) (kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"} * on (pod) group_left() ((sum by (pod) (rate(container_cpu_cfs_throttled_periods_total{node=~\"$node\", namespace=\"$namespace\"}[$__rate_interval])) / sum by (pod) (rate(container_cpu_cfs_periods_total{node=~\"$node\", namespace=\"$namespace\"}[$__rate_interval]))) * sum by (pod) (kube_pod_container_resource_limits{resource=\"cpu\",unit=\"core\",node=~\"$node\", namespace=\"$namespace\"}))))", "format": "time_series", "intervalFactor": 1, "legendFormat": "Total", "refId": "B" } ], "thresholds": [], "timeRegions": [], "title": "Throttling", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "mode": "time", "show": true, "values": [] }, "yaxes": [ { "format": "short", "label": "cores", "logBase": 1, "min": "0", "show": true }, { "format": "short", "logBase": 1, "show": false } ], "yaxis": { "align": false } }, { "collapsed": true, "datasource": { "type": "prometheus", "uid": "$ds_prometheus" }, "gridPos": { "h": 1, "w": 24, "x": 0, "y": 46 }, "id": 343, "panels": [ { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": { "uid": "$ds_prometheus" }, "description": "", "fill": 0, "gridPos": { "h": 9, "w": 6, "x": 0, "y": 47 }, "id": 341, "legend": { "alignAsTable": false, "avg": false, "current": false, "max": false, "min": false, "rightSide": false, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 2, "links": [], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": "controller", "repeatDirection": "h", "seriesOverrides": [ { "alias": "Usage", "color": "#629e51" }, { "alias": "Requests", "color": "#f4d598" }, { "alias": "Limits", "color": "#c15c17" }, { "alias": "VPA Target", "color": "#447ebc" } ], "spaceLength": 10, "stack": false, "steppedLine": true, "targets": [ { "expr": "sum by (controller) (kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=\"$controller\"} * on (pod) group_left() sum by (pod) (rate(container_cpu_usage_seconds_total{node=~\"$node\", namespace=\"$namespace\", container!=\"POD\"}[$__rate_interval])))", "format": "time_series", "intervalFactor": 1, "legendFormat": "Usage", "refId": "D" }, { "expr": "sum by (controller)\n (\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=\"$controller\"}\n * on (pod) group_left()\n sum by(pod) (avg_over_time(kube_pod_container_resource_requests{resource=\"cpu\",unit=\"core\",node=~\"$node\", container!=\"POD\",namespace=\"$namespace\"}[$__rate_interval]))\n )", "format": "time_series", "intervalFactor": 1, "legendFormat": "Requests", "refId": "C" }, { "expr": "sum by (controller)\n (\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=\"$controller\"}\n * on (pod) group_left()\n sum by(pod) (avg_over_time(kube_pod_container_resource_limits{resource=\"cpu\",unit=\"core\",node=~\"$node\", container!=\"POD\",namespace=\"$namespace\"}[$__rate_interval]))\n )", "format": "time_series", "intervalFactor": 1, "legendFormat": "Limits", "refId": "E" }, { "expr": "sum by (controller)\n (\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=\"$controller\"}\n * on (controller_type, controller_name) group_left()\n sum by(controller_type, controller_name) (avg_over_time(vpa_target_recommendation{container!=\"POD\",namespace=\"$namespace\", resource=\"cpu\"}[$__rate_interval]))\n )", "format": "time_series", "intervalFactor": 1, "legendFormat": "VPA Target", "refId": "F" } ], "thresholds": [], "timeRegions": [], "title": "$controller", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "mode": "time", "show": true, "values": [] }, "yaxes": [ { "format": "short", "label": "cores", "logBase": 1, "min": "0", "show": true }, { "format": "short", "logBase": 1, "show": true } ], "yaxis": { "align": false } } ], "title": "Controllers CPU", "type": "row" }, { "collapsed": true, "datasource": { "type": "prometheus", "uid": "$ds_prometheus" }, "gridPos": { "h": 1, "w": 24, "x": 0, "y": 47 }, "id": 12, "panels": [ { "aliasColors": {}, "bars": true, "dashLength": 10, "dashes": false, "datasource": { "uid": "$ds_prometheus" }, "description": "The total value may not equal the sum of system and user times because of the kernel's cgroup accounting peculiarities. Read more here: https://www.kernel.org/doc/Documentation/cgroup-v1/cpuacct.txt", "fill": 1, "gridPos": { "h": 9, "w": 6, "x": 0, "y": 48 }, "id": 14, "legend": { "alignAsTable": false, "avg": false, "current": false, "max": false, "min": false, "rightSide": false, "show": true, "total": false, "values": false }, "lines": false, "linewidth": 1, "links": [], "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": "controller", "repeatDirection": "h", "seriesOverrides": [ { "alias": "Total", "bars": false, "stack": false }, { "alias": "System", "color": "#e24d42" }, { "alias": "User", "color": "#1f78c1" } ], "spaceLength": 10, "stack": true, "steppedLine": true, "targets": [ { "expr": "sum by (controller) (kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=\"$controller\"} * on (pod) group_left() sum by (pod) (rate(container_cpu_system_seconds_total{node=~\"$node\", namespace=\"$namespace\", container!=\"POD\"}[$__rate_interval])))", "format": "time_series", "interval": "", "intervalFactor": 1, "legendFormat": "System", "refId": "A" }, { "expr": "sum by (controller) (kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=\"$controller\"} * on (pod) group_left() sum by (pod) (rate(container_cpu_user_seconds_total{node=~\"$node\", namespace=\"$namespace\", container!=\"POD\"}[$__rate_interval])))", "format": "time_series", "intervalFactor": 1, "legendFormat": "User", "refId": "B" }, { "expr": "sum by (controller) (kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=\"$controller\"} * on (pod) group_left() sum by (pod) (rate(container_cpu_usage_seconds_total{node=~\"$node\", namespace=\"$namespace\", container!=\"POD\"}[$__rate_interval])))", "format": "time_series", "intervalFactor": 1, "legendFormat": "Total", "refId": "D" } ], "thresholds": [], "timeRegions": [], "title": "$controller", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "mode": "time", "show": true, "values": [] }, "yaxes": [ { "format": "short", "label": "cores", "logBase": 1, "min": "0", "show": true }, { "format": "short", "logBase": 1, "show": true } ], "yaxis": { "align": false } } ], "title": "Controllers CPU by state", "type": "row" }, { "collapsed": false, "datasource": { "type": "prometheus", "uid": "$ds_prometheus" }, "gridPos": { "h": 1, "w": 24, "x": 0, "y": 48 }, "id": 38, "panels": [], "title": "Memory", "type": "row" }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": { "type": "prometheus", "uid": "$ds_prometheus" }, "fill": 1, "fillGradient": 0, "gridPos": { "h": 9, "w": 12, "x": 0, "y": 49 }, "hiddenSeries": false, "id": 40, "legend": { "alignAsTable": false, "avg": false, "current": false, "max": false, "min": false, "rightSide": false, "show": false, "sort": "avg", "sortDesc": true, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [], "nullPointMode": "null as zero", "options": { "alertThreshold": true }, "percentage": false, "pluginVersion": "8.5.13", "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [ { "alias": "Total", "lines": false, "stack": false } ], "spaceLength": 10, "stack": true, "steppedLine": false, "targets": [ { "datasource": { "type": "prometheus", "uid": "$ds_prometheus" }, "expr": "sum by (controller)\n (\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"}\n * on (pod) group_left()\n sum by (pod) (avg_over_time(container_memory_working_set_bytes:without_kmem{node=~\"$node\", namespace=\"$namespace\", container!=\"POD\"}[$__rate_interval]))\n )", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{ controller }}", "refId": "A" }, { "datasource": { "type": "prometheus", "uid": "$ds_prometheus" }, "expr": "sum\n (\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"}\n * on (pod) group_left()\n sum by (pod) (avg_over_time(container_memory_working_set_bytes:without_kmem{node=~\"$node\", namespace=\"$namespace\", container!=\"POD\"}[$__rate_interval]))\n )", "format": "time_series", "interval": "", "intervalFactor": 1, "legendFormat": "Total", "refId": "B" } ], "thresholds": [], "timeRegions": [], "title": "Usage by controller", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "mode": "time", "show": true, "values": [] }, "yaxes": [ { "$$hashKey": "object:205", "format": "bytes", "logBase": 1, "min": "0", "show": true }, { "$$hashKey": "object:206", "format": "short", "logBase": 1, "show": false } ], "yaxis": { "align": false } }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": { "uid": "$ds_prometheus" }, "description": "The Working set bytes metric is the actual memory used by the container, as it includes active file memory. When its value approaches the limit, the container can be killed by the OOM killer. This value can be higher than the sum RSS and Cache since not all active file memory is Cache.", "fill": 1, "fillGradient": 0, "gridPos": { "h": 9, "w": 12, "x": 12, "y": 49 }, "hiddenSeries": false, "id": 41, "legend": { "alignAsTable": false, "avg": false, "current": false, "max": false, "min": false, "rightSide": false, "show": false, "sort": "avg", "sortDesc": true, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [], "nullPointMode": "null as zero", "options": { "alertThreshold": true }, "percentage": false, "pluginVersion": "8.5.13", "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [ { "alias": "Working set bytes without kmem", "color": "rgb(0, 0, 0)", "dashes": true, "fill": 0, "linewidth": 2, "stack": false }, { "alias": "Kmem", "color": "rgb(255, 0, 0)", "dashes": true, "fill": 0, "linewidth": 2, "stack": false } ], "spaceLength": 10, "stack": true, "steppedLine": false, "targets": [ { "expr": "sum\n (\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"}\n * on (pod) group_left()\n sum by (pod) (avg_over_time(container_memory_rss{node=~\"$node\", namespace=\"$namespace\", container!=\"POD\"}[$__rate_interval]))\n )", "format": "time_series", "intervalFactor": 1, "legendFormat": "RSS", "refId": "A" }, { "expr": "sum \n (\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"}\n * on (pod) group_left()\n sum by (pod) (avg_over_time(container_memory_cache{node=~\"$node\", namespace=\"$namespace\", container!=\"POD\"}[$__rate_interval]))\n )", "format": "time_series", "interval": "", "intervalFactor": 1, "legendFormat": "Cache", "refId": "B" }, { "expr": "sum \n (\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"}\n * on (pod) group_left()\n sum by (pod) (avg_over_time(container_memory_swap{node=~\"$node\", namespace=\"$namespace\", container!=\"POD\"}[$__rate_interval]))\n )", "format": "time_series", "interval": "", "intervalFactor": 1, "legendFormat": "Swap", "refId": "C" }, { "expr": "sum \n (\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"}\n * on (pod) group_left()\n sum by (pod) (avg_over_time(container_memory_working_set_bytes:without_kmem{node=~\"$node\", namespace=\"$namespace\", container!=\"POD\"}[$__rate_interval]))\n )", "format": "time_series", "intervalFactor": 1, "legendFormat": "Working set bytes without kmem", "refId": "D" }, { "expr": "sum \n (\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"}\n * on (pod) group_left()\n sum by (pod) (avg_over_time(container_memory:kmem{node=~\"$node\", namespace=\"$namespace\", container!=\"POD\"}[$__rate_interval]))\n )", "format": "time_series", "intervalFactor": 1, "legendFormat": "Kmem", "refId": "E" } ], "thresholds": [], "timeRegions": [], "title": "Usage by state", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "mode": "time", "show": true, "values": [] }, "yaxes": [ { "format": "bytes", "logBase": 1, "min": "0", "show": true }, { "format": "short", "logBase": 1, "show": false } ], "yaxis": { "align": false } }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": { "type": "prometheus", "uid": "$ds_prometheus" }, "description": "This graph shows the requested Memory resources higher than the actual Memory consumption. In other words, it shows Memory resources that can be \"freed\" without affecting the service.", "fill": 1, "fillGradient": 0, "gridPos": { "h": 8, "w": 12, "x": 0, "y": 58 }, "hiddenSeries": false, "id": 489, "legend": { "alignAsTable": false, "avg": false, "current": false, "max": false, "min": false, "rightSide": false, "show": false, "sort": "avg", "sortDesc": true, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [], "nullPointMode": "null as zero", "options": { "alertThreshold": true }, "percentage": false, "pluginVersion": "8.5.13", "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [ { "alias": "Total", "lines": false, "stack": false } ], "spaceLength": 10, "stack": true, "steppedLine": false, "targets": [ { "datasource": { "type": "prometheus", "uid": "$ds_prometheus" }, "expr": "sum by (controller)\n (\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"}\n * on (namespace, pod) group_left()\n sum by (namespace, pod)\n (\n (\n sum by(namespace, pod, container) (avg_over_time(kube_pod_container_resource_requests{resource=\"memory\",unit=\"byte\",node=~\"$node\", namespace=\"$namespace\"}[$__rate_interval]))\n -\n sum by(namespace, pod, container) (avg_over_time(container_memory_working_set_bytes:without_kmem{node=~\"$node\", container!=\"POD\", namespace=\"$namespace\"}[$__rate_interval]))\n ) > 0\n )\n )", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{ controller }}", "refId": "A" }, { "datasource": { "type": "prometheus", "uid": "$ds_prometheus" }, "expr": "sum\n (\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"}\n * on (namespace, pod) group_left()\n sum by (namespace, pod)\n (\n (\n sum by(namespace, pod, container) (avg_over_time(kube_pod_container_resource_requests{resource=\"memory\",unit=\"byte\",node=~\"$node\", namespace=\"$namespace\"}[$__rate_interval]))\n -\n sum by(namespace, pod, container) (avg_over_time(container_memory_working_set_bytes:without_kmem{node=~\"$node\", container!=\"POD\", namespace=\"$namespace\"}[$__rate_interval]))\n ) > 0\n )\n )", "format": "time_series", "intervalFactor": 1, "legendFormat": "Total", "refId": "B" } ], "thresholds": [], "timeRegions": [], "title": "Over-requested by controller", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "mode": "time", "show": true, "values": [] }, "yaxes": [ { "$$hashKey": "object:95", "format": "bytes", "logBase": 1, "min": "0", "show": true }, { "$$hashKey": "object:96", "format": "short", "logBase": 1, "show": false } ], "yaxis": { "align": false } }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": { "uid": "$ds_prometheus" }, "description": "This graph shows the requested Memory resources higher than the actual Memory consumption. In other words, it shows Memory resources that need to be \"reserved\" for the service to run smoothly.", "fill": 1, "fillGradient": 0, "gridPos": { "h": 8, "w": 12, "x": 12, "y": 58 }, "hiddenSeries": false, "id": 575, "legend": { "alignAsTable": false, "avg": false, "current": false, "max": false, "min": false, "rightSide": false, "show": false, "sort": "avg", "sortDesc": true, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [], "nullPointMode": "null as zero", "options": { "alertThreshold": true }, "percentage": false, "pluginVersion": "8.5.13", "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [ { "alias": "Total", "lines": false, "stack": false } ], "spaceLength": 10, "stack": true, "steppedLine": false, "targets": [ { "expr": "sum by (controller)\n (\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"}\n * on (namespace, pod) group_left()\n sum by (namespace, pod)\n (\n (\n (\n sum by(namespace, pod, container) (avg_over_time(container_memory_working_set_bytes:without_kmem{node=~\"$node\", namespace=\"$namespace\"}[$__rate_interval]))\n -\n sum by(namespace, pod, container) (avg_over_time(kube_pod_container_resource_requests{resource=\"memory\",unit=\"byte\",node=~\"$node\", container!=\"POD\", namespace=\"$namespace\"}[$__rate_interval]))\n ) or sum by(namespace, pod, container) (avg_over_time(container_memory_working_set_bytes:without_kmem{node=~\"$node\", container!=\"POD\", namespace=\"$namespace\"}[$__rate_interval]))\n ) > 0\n )\n )", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{ controller }}", "refId": "A" }, { "expr": "sum\n (\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"}\n * on (namespace, pod) group_left()\n sum by (namespace, pod)\n (\n (\n (\n sum by(namespace, pod, container) (avg_over_time(container_memory_working_set_bytes:without_kmem{node=~\"$node\", namespace=\"$namespace\"}[$__rate_interval]))\n -\n sum by(namespace, pod, container) (avg_over_time(kube_pod_container_resource_requests{resource=\"memory\",unit=\"byte\",node=~\"$node\", container!=\"POD\", namespace=\"$namespace\"}[$__rate_interval]))\n ) or sum by(namespace, pod, container) (avg_over_time(container_memory_working_set_bytes:without_kmem{node=~\"$node\", container!=\"POD\", namespace=\"$namespace\"}[$__rate_interval]))\n ) > 0\n )\n )", "format": "time_series", "intervalFactor": 1, "legendFormat": "Total", "refId": "B" }, { "expr": "sum by (controller)\n (\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"}\n * on (namespace, pod) group_left()\n sum by (namespace, pod)\n (\n (\n (\n sum by(namespace, pod, container) (avg_over_time(container_memory_working_set_bytes:without_kmem{node=~\"$node\", namespace=\"$namespace\"}[$__rate_interval]))\n -\n sum by(namespace, pod, container) (avg_over_time(kube_pod_container_resource_requests{resource=\"memory\",unit=\"byte\",node=~\"$node\", container!=\"POD\", namespace=\"$namespace\"}[$__rate_interval]))\n ) or sum by(namespace, pod, container) (avg_over_time(container_memory:kmem{node=~\"$node\", container!=\"POD\", namespace=\"$namespace\"}[$__rate_interval]))\n ) > 0\n )\n )", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{ controller }}", "refId": "C" }, { "expr": "sum\n (\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"}\n * on (namespace, pod) group_left()\n sum by (namespace, pod)\n (\n (\n (\n sum by(namespace, pod, container) (avg_over_time(container_memory_working_set_bytes:without_kmem{node=~\"$node\", namespace=\"$namespace\"}[$__rate_interval]))\n -\n sum by(namespace, pod, container) (avg_over_time(kube_pod_container_resource_requests{resource=\"memory\",unit=\"byte\",node=~\"$node\", container!=\"POD\", namespace=\"$namespace\"}[$__rate_interval]))\n ) or sum by(namespace, pod, container) (avg_over_time(container_memory:kmem{node=~\"$node\", container!=\"POD\", namespace=\"$namespace\"}[$__rate_interval]))\n ) > 0\n )\n )", "format": "time_series", "intervalFactor": 1, "legendFormat": "Total", "refId": "D" } ], "thresholds": [], "timeRegions": [], "title": "Under-requested by controller", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "mode": "time", "show": true, "values": [] }, "yaxes": [ { "format": "bytes", "logBase": 1, "min": "0", "show": true }, { "format": "short", "logBase": 1, "show": false } ], "yaxis": { "align": false } }, { "collapsed": true, "datasource": { "type": "prometheus", "uid": "$ds_prometheus" }, "gridPos": { "h": 1, "w": 24, "x": 0, "y": 66 }, "id": 43, "panels": [ { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": { "uid": "$ds_prometheus" }, "description": "The Working set bytes metric is the actual memory used by the container, as it includes active file memory. When its value approaches the limit, the container can be killed by the OOM killer. This value can be higher than the sum RSS and Cache since not all active file memory is Cache.", "fill": 1, "gridPos": { "h": 9, "w": 6, "x": 0, "y": 67 }, "id": 44, "legend": { "alignAsTable": false, "avg": false, "current": false, "max": false, "min": false, "rightSide": false, "show": false, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [], "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": "controller", "repeatDirection": "h", "seriesOverrides": [ { "alias": "Working set bytes without kmem", "color": "rgb(0, 0, 0)", "dashes": true, "fill": 0, "linewidth": 2, "stack": false }, { "alias": "Kmem", "color": "rgb(255, 0, 0)", "dashes": true, "fill": 0, "linewidth": 2, "stack": false }, { "alias": "VPA Target", "color": "#447ebc", "dashes": true, "fill": 0, "lines": false, "linewidth": 2, "stack": false }, { "alias": "Requests", "color": "#f4d598", "dashes": true, "fill": 0, "lines": false, "linewidth": 2, "stack": false }, { "alias": "Limits", "color": "#c15c17", "dashes": true, "fill": 0, "lines": false, "linewidth": 2, "stack": false } ], "spaceLength": 10, "stack": true, "steppedLine": false, "targets": [ { "expr": "sum \n (\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=\"$controller\"}\n * on (pod) group_left() \n sum by (pod) (avg_over_time(container_memory_rss{node=~\"$node\", namespace=\"$namespace\", container!=\"POD\"}[$__rate_interval]))\n )", "format": "time_series", "intervalFactor": 1, "legendFormat": "RSS", "refId": "A" }, { "expr": "sum\n (\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=\"$controller\"} \n * on (pod) group_left() \n sum by (pod) (avg_over_time(container_memory_cache{node=~\"$node\", namespace=\"$namespace\", container!=\"POD\"}[$__rate_interval]))\n )", "format": "time_series", "interval": "", "intervalFactor": 1, "legendFormat": "Cache", "refId": "B" }, { "expr": "sum \n (\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=\"$controller\"}\n * on (pod) group_left() \n sum by (pod) (avg_over_time(container_memory_swap{node=~\"$node\", namespace=\"$namespace\", container!=\"POD\"}[$__rate_interval]))\n )", "format": "time_series", "interval": "", "intervalFactor": 1, "legendFormat": "Swap", "refId": "C" }, { "expr": "sum \n (\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=\"$controller\"}\n * on (pod) group_left()\n sum by (pod) (avg_over_time(container_memory_working_set_bytes:without_kmem{node=~\"$node\", namespace=\"$namespace\", container!=\"POD\"}[$__rate_interval]))\n )", "format": "time_series", "intervalFactor": 1, "legendFormat": "Working set bytes without kmem", "refId": "D" }, { "expr": "sum \n (\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=\"$controller\"}\n * on (pod) group_left()\n sum by(pod) (avg_over_time(kube_pod_container_resource_requests{resource=\"memory\",unit=\"byte\",node=~\"$node\", container!=\"POD\",namespace=\"$namespace\"}[$__rate_interval]))\n ) ", "format": "time_series", "intervalFactor": 1, "legendFormat": "Requests", "refId": "E" }, { "expr": "sum\n (\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=\"$controller\"} \n * on (pod) group_left() \n sum by(pod) (avg_over_time(kube_pod_container_resource_limits{resource=\"memory\",unit=\"byte\",node=~\"$node\", container!=\"POD\",namespace=\"$namespace\"}[$__rate_interval]))\n )", "format": "time_series", "intervalFactor": 1, "legendFormat": "Limits", "refId": "F" }, { "expr": "sum \n (\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=\"$controller\"}\n * on (controller_type, controller_name) group_left()\n sum by(controller_type, controller_name) (avg_over_time(vpa_target_recommendation{container!=\"POD\",namespace=\"$namespace\", resource=\"memory\"}[$__rate_interval]))\n )", "format": "time_series", "intervalFactor": 1, "legendFormat": "VPA Target", "refId": "G" }, { "expr": "sum \n (\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=\"$controller\"}\n * on (pod) group_left()\n sum by (pod) (avg_over_time(container_memory:kmem{node=~\"$node\", namespace=\"$namespace\", container!=\"POD\"}[$__rate_interval]))\n )", "format": "time_series", "intervalFactor": 1, "legendFormat": "Kmem", "refId": "H" } ], "thresholds": [], "timeRegions": [], "title": "$controller", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "mode": "time", "show": true, "values": [] }, "yaxes": [ { "format": "bytes", "logBase": 1, "min": "0", "show": true }, { "format": "short", "logBase": 1, "show": false } ], "yaxis": { "align": false } } ], "title": "Controllers Memory", "type": "row" }, { "collapsed": false, "datasource": { "type": "prometheus", "uid": "$ds_prometheus" }, "gridPos": { "h": 1, "w": 24, "x": 0, "y": 67 }, "id": 77, "panels": [], "title": "Network", "type": "row" }, { "aliasColors": {}, "bars": true, "dashLength": 10, "dashes": false, "datasource": { "uid": "$ds_prometheus" }, "description": "This graph shows Network Receive (except for the hostNetwork Pods)", "fill": 1, "fillGradient": 0, "gridPos": { "h": 8, "w": 12, "x": 0, "y": 68 }, "hiddenSeries": false, "id": 79, "legend": { "alignAsTable": true, "avg": true, "current": false, "max": false, "min": false, "rightSide": true, "show": true, "sort": "avg", "sortDesc": true, "total": false, "values": true }, "lines": false, "linewidth": 2, "links": [], "nullPointMode": "null as zero", "options": { "alertThreshold": true }, "percentage": false, "pluginVersion": "8.5.13", "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [ { "alias": "Total", "bars": false, "stack": false } ], "spaceLength": 10, "stack": true, "steppedLine": false, "targets": [ { "expr": "# Data rate for the controller is a sum of data rates of its Pods.\nsum by (controller) (\n # Select Pods by controller_type and controller.\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"} \n * on (pod)\n group_left() # Preserve controller label from the 'kube_controller_pod' metric.\n (\n # Select Pods with hostNetwork: false.\n kube_pod_info{host_network=\"false\",namespace=\"$namespace\"}\n * on(pod)\n # Sum data rate for all interfaces in the Pod. \n sum by (pod) (rate(container_network_receive_bytes_total{node=~\"$node\", namespace=\"$namespace\"}[$__rate_interval]))\n )\n)", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{ controller }}", "refId": "A" }, { "expr": "# Total is a sum of data rates of all Pods in selected containers.\nsum (\n # Select Pods by controller_type and controller.\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"} \n * on (pod)\n # Select Pods with hostNetwork: false.\n kube_pod_info{host_network=\"false\",namespace=\"$namespace\"}\n * on(pod)\n # Sum data rate for all interfaces in the Pod. \n sum by (pod) (rate(container_network_receive_bytes_total{node=~\"$node\", namespace=\"$namespace\"}[$__rate_interval]))\n)", "format": "time_series", "intervalFactor": 1, "legendFormat": "Total", "refId": "B" } ], "thresholds": [], "timeRegions": [], "title": "Receive", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "mode": "time", "show": true, "values": [] }, "yaxes": [ { "format": "Bps", "logBase": 1, "min": "0", "show": true }, { "format": "short", "logBase": 1, "show": true } ], "yaxis": { "align": false } }, { "aliasColors": {}, "bars": true, "dashLength": 10, "dashes": false, "datasource": { "uid": "$ds_prometheus" }, "description": "This graph shows Network Transmit (except for the hostNetwork Pods)", "fill": 1, "fillGradient": 0, "gridPos": { "h": 8, "w": 12, "x": 12, "y": 68 }, "hiddenSeries": false, "id": 240, "legend": { "alignAsTable": true, "avg": true, "current": false, "max": false, "min": false, "rightSide": true, "show": true, "sort": "avg", "sortDesc": true, "total": false, "values": true }, "lines": false, "linewidth": 2, "links": [], "nullPointMode": "null as zero", "options": { "alertThreshold": true }, "percentage": false, "pluginVersion": "8.5.13", "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [ { "alias": "Total", "bars": false, "stack": false } ], "spaceLength": 10, "stack": true, "steppedLine": false, "targets": [ { "expr": "# Data rate for the controller is a sum of data rates of its Pods.\nsum by (controller) (\n # Select Pods by controller_type and controller.\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"} \n * on (pod)\n group_left() # Preserve controller label from the 'kube_controller_pod' metric.\n (\n # Select Pods with hostNetwork: false.\n kube_pod_info{host_network=\"false\",namespace=\"$namespace\"}\n * on(pod)\n # Sum data rate for all interfaces in the Pod. \n sum by (pod) (rate(container_network_transmit_bytes_total{node=~\"$node\", namespace=\"$namespace\"}[$__rate_interval]))\n )\n)", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{ controller }}", "refId": "A" }, { "expr": "# Total is a sum of data rates of all Pods in selected containers.\nsum (\n # Select Pods by controller_type and controller.\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"} \n * on (pod)\n # Select Pods with hostNetwork: false.\n kube_pod_info{host_network=\"false\",namespace=\"$namespace\"}\n * on(pod)\n # Sum data rate for all interfaces in the Pod. \n sum by (pod) (rate(container_network_transmit_bytes_total{node=~\"$node\", namespace=\"$namespace\"}[$__rate_interval]))\n)", "format": "time_series", "intervalFactor": 1, "legendFormat": "Total", "refId": "B" } ], "thresholds": [], "timeRegions": [], "title": "Transmit", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "mode": "time", "show": true, "values": [] }, "yaxes": [ { "format": "Bps", "logBase": 1, "min": "0", "show": true }, { "format": "short", "logBase": 1, "show": true } ], "yaxis": { "align": false } }, { "collapsed": false, "datasource": { "type": "prometheus", "uid": "$ds_prometheus" }, "gridPos": { "h": 1, "w": 24, "x": 0, "y": 76 }, "id": 154, "panels": [], "title": "IOPS", "type": "row" }, { "aliasColors": {}, "bars": true, "dashLength": 10, "dashes": false, "datasource": { "uid": "$ds_prometheus" }, "fill": 1, "fillGradient": 0, "gridPos": { "h": 7, "w": 12, "x": 0, "y": 77 }, "hiddenSeries": false, "id": 156, "legend": { "alignAsTable": true, "avg": true, "current": false, "max": false, "min": false, "rightSide": true, "show": true, "sort": "avg", "sortDesc": true, "total": false, "values": true }, "lines": false, "linewidth": 1, "links": [], "nullPointMode": "null as zero", "options": { "alertThreshold": true }, "percentage": false, "pluginVersion": "8.5.13", "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [ { "alias": "Total", "bars": false, "stack": false } ], "spaceLength": 10, "stack": true, "steppedLine": false, "targets": [ { "expr": "sum by (controller) (kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"} * on (pod) group_left() sum by (pod) (rate(container_fs_reads_total{node=~\"$node\", container!=\"POD\", namespace=\"$namespace\"}[$__rate_interval])))", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{ controller }}", "refId": "A" }, { "expr": "sum (sum by (controller) (kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"} * on (pod) group_left() sum by (pod) (rate(container_fs_reads_total{node=~\"$node\", container!=\"POD\", namespace=\"$namespace\"}[$__rate_interval]))))", "format": "time_series", "intervalFactor": 1, "legendFormat": "Total", "refId": "B" } ], "thresholds": [], "timeRegions": [], "title": "Read", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "mode": "time", "show": true, "values": [] }, "yaxes": [ { "format": "iops", "logBase": 1, "min": "0", "show": true }, { "format": "short", "logBase": 1, "show": false } ], "yaxis": { "align": false } }, { "aliasColors": {}, "bars": true, "dashLength": 10, "dashes": false, "datasource": { "uid": "$ds_prometheus" }, "fill": 1, "fillGradient": 0, "gridPos": { "h": 7, "w": 12, "x": 12, "y": 77 }, "hiddenSeries": false, "id": 241, "legend": { "alignAsTable": true, "avg": true, "current": false, "max": false, "min": false, "rightSide": true, "show": true, "sort": "avg", "sortDesc": true, "total": false, "values": true }, "lines": false, "linewidth": 1, "links": [], "nullPointMode": "null as zero", "options": { "alertThreshold": true }, "percentage": false, "pluginVersion": "8.5.13", "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [ { "alias": "Total", "bars": false, "stack": false } ], "spaceLength": 10, "stack": true, "steppedLine": false, "targets": [ { "expr": "sum by (controller) (kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"} * on (pod) group_left() sum by (pod) (rate(container_fs_writes_total{node=~\"$node\", container!=\"POD\", namespace=\"$namespace\"}[$__rate_interval])))", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{ controller }}", "refId": "A" }, { "expr": "sum (sum by (controller) (kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"} * on (pod) group_left() sum by (pod) (rate(container_fs_writes_total{node=~\"$node\", container!=\"POD\", namespace=\"$namespace\"}[$__rate_interval]))))", "format": "time_series", "intervalFactor": 1, "legendFormat": "Total", "refId": "B" } ], "thresholds": [], "timeRegions": [], "title": "Write", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "mode": "time", "show": true, "values": [] }, "yaxes": [ { "format": "iops", "logBase": 1, "min": "0", "show": true }, { "format": "short", "logBase": 1, "show": false } ], "yaxis": { "align": false } }, { "collapsed": false, "datasource": { "type": "prometheus", "uid": "$ds_prometheus" }, "gridPos": { "h": 1, "w": 24, "x": 0, "y": 84 }, "id": 595, "panels": [], "title": "PVC", "type": "row" }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": { "uid": "$ds_prometheus" }, "description": "This graph does not show any localstorage-related information due to incorrectly calculating the occupied space on localstorage disks", "fill": 1, "fillGradient": 0, "gridPos": { "h": 7, "w": 12, "x": 0, "y": 85 }, "hiddenSeries": false, "id": 598, "legend": { "alignAsTable": true, "avg": true, "current": false, "max": false, "min": false, "rightSide": true, "show": true, "sort": "avg", "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], "nullPointMode": "null as zero", "options": { "alertThreshold": true }, "percentage": false, "pluginVersion": "8.2.6", "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [ { "alias": "/Total/", "fill": 0, "linewidth": 2, "stack": false } ], "spaceLength": 10, "stack": true, "steppedLine": false, "targets": [ { "expr": "sum by (namespace, controller) (\n max by (namespace, controller, persistentvolumeclaim)\n (\n sum by (namespace, persistentvolumeclaim) (\n max_over_time(kubelet_volume_stats_capacity_bytes{node=~\"$node\", namespace=\"$namespace\"}[$__rate_interval])\n ) \n\n * max by (namespace, persistentvolumeclaim) (\n label_replace(\n max_over_time(kube_persistentvolumeclaim_info{namespace=\"$namespace\"}[$__rate_interval]),\n \"persistentvolume\", \"$1\", \"volumename\", \"(.*)\")\n + on (persistentvolume) group_left() max by (persistentvolume) (kube_persistentvolume_is_local == 0)\n )\n\n * on (namespace, persistentvolumeclaim) group_left(pod) \n max by (namespace, persistentvolumeclaim, pod) (\n max_over_time(kube_pod_spec_volumes_persistentvolumeclaims_info{namespace=\"$namespace\"}[$__rate_interval])\n ) \n\n * on (namespace, pod) group_left(controller)\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=~\"$controller\"} \n )\n)", "format": "time_series", "intervalFactor": 1, "legendFormat": "Provisioned {{ controller }}", "refId": "A" }, { "expr": "sum (\n sum by (namespace, persistentvolumeclaim) (\n max_over_time(kube_persistentvolumeclaim_resource_requests_storage_bytes{namespace=\"$namespace\"}[$__rate_interval])\n ) \n * \n sum by (namespace, persistentvolumeclaim) (\n label_replace(\n max_over_time(kube_persistentvolumeclaim_info{namespace=\"$namespace\"}[$__rate_interval]),\n \"persistentvolume\", \"$1\", \"volumename\", \"(.*)\")\n + on (persistentvolume) group_left() max by (persistentvolume) (kube_persistentvolume_is_local == 0)\n\n )\n unless\n sum by (namespace, persistentvolumeclaim) (\n max_over_time(kube_pod_spec_volumes_persistentvolumeclaims_info{namespace=\"$namespace\"}[$__rate_interval])\n ) \n) \nand (count (kube_node_info) == count (kube_node_info{node=~\"$node\"}))", "format": "time_series", "intervalFactor": 1, "legendFormat": "Unused PVC", "refId": "B" }, { "expr": "sum (\n sum by (namespace, controller) (\n max by (namespace, controller, persistentvolumeclaim)\n (\n sum by (namespace, persistentvolumeclaim) (\n max_over_time(kubelet_volume_stats_capacity_bytes{node=~\"$node\", namespace=\"$namespace\"}[$__rate_interval])\n ) \n\n * max by (namespace, persistentvolumeclaim) (\n label_replace(\n max_over_time(kube_persistentvolumeclaim_info{namespace=\"$namespace\"}[$__rate_interval]),\n \"persistentvolume\", \"$1\", \"volumename\", \"(.*)\")\n + on (persistentvolume) group_left() max by (persistentvolume) (kube_persistentvolume_is_local == 0)\n\n )\n\n * on (namespace, persistentvolumeclaim) group_left(pod) \n max by (namespace, persistentvolumeclaim, pod) (\n max_over_time(kube_pod_spec_volumes_persistentvolumeclaims_info{namespace=\"$namespace\"}[$__rate_interval])\n ) \n\n * on (namespace, pod) group_left(controller)\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=~\"$controller\"} \n )\n )\n)\n+\n(sum (\n sum by (namespace, persistentvolumeclaim) (\n max_over_time(kube_persistentvolumeclaim_resource_requests_storage_bytes{namespace=\"$namespace\"}[$__rate_interval])\n ) \n * \n sum by (namespace, persistentvolumeclaim) (\n label_replace(\n max_over_time(kube_persistentvolumeclaim_info{namespace=\"$namespace\"}[$__rate_interval]),\n \"persistentvolume\", \"$1\", \"volumename\", \"(.*)\")\n + on (persistentvolume) group_left() max by (persistentvolume) (kube_persistentvolume_is_local == 0)\n\n )\n unless\n sum by (namespace, persistentvolumeclaim) (\n max_over_time(kube_pod_spec_volumes_persistentvolumeclaims_info{namespace=\"$namespace\"}[$__rate_interval])\n ) \n) * (count (kube_node_info) == bool count (kube_node_info{node=~\"$node\"})))", "format": "time_series", "intervalFactor": 1, "legendFormat": "Total", "refId": "C" } ], "thresholds": [], "timeRegions": [], "title": "PVC Provisioned (except local storage classes)", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "mode": "time", "show": true, "values": [] }, "yaxes": [ { "format": "bytes", "logBase": 1, "min": "0", "show": true }, { "format": "short", "logBase": 1, "show": false } ], "yaxis": { "align": false } }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": { "uid": "$ds_prometheus" }, "description": "This graph does not show any localstorage-related information due to incorrectly calculating the occupied space on localstorage disks", "fill": 1, "fillGradient": 0, "gridPos": { "h": 7, "w": 12, "x": 12, "y": 85 }, "hiddenSeries": false, "id": 596, "legend": { "alignAsTable": true, "avg": true, "current": false, "max": false, "min": false, "rightSide": true, "show": true, "sort": "avg", "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], "nullPointMode": "null as zero", "options": { "alertThreshold": true }, "percentage": false, "pluginVersion": "8.2.6", "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [ { "alias": "/Total/", "fill": 0, "linewidth": 2, "stack": false } ], "spaceLength": 10, "stack": true, "steppedLine": false, "targets": [ { "expr": "sum by (namespace, controller) (\n max by (namespace, controller, persistentvolumeclaim)\n (\n sum by (namespace, persistentvolumeclaim) (\n max_over_time(kubelet_volume_stats_capacity_bytes{node=~\"$node\", namespace=\"$namespace\"}[$__rate_interval]) - max_over_time(kubelet_volume_stats_available_bytes{node=~\"$node\", namespace=\"$namespace\"}[$__rate_interval])\n ) \n\n * max by (namespace, persistentvolumeclaim) (\n label_replace(\n max_over_time(kube_persistentvolumeclaim_info{namespace=\"$namespace\"}[$__rate_interval]),\n \"persistentvolume\", \"$1\", \"volumename\", \"(.*)\")\n + on (persistentvolume) group_left() max by (persistentvolume) (kube_persistentvolume_is_local == 0)\n )\n\n * on (namespace, persistentvolumeclaim) group_left(pod) \n max by (namespace, persistentvolumeclaim, pod) (\n max_over_time(kube_pod_spec_volumes_persistentvolumeclaims_info{namespace=\"$namespace\"}[$__rate_interval])\n ) \n\n * on (namespace, pod) group_left(controller)\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=~\"$controller\"} \n )\n)", "format": "time_series", "intervalFactor": 1, "legendFormat": "Usage {{ controller }}", "refId": "A" }, { "expr": "sum(\n sum by (namespace, controller) (\n max by (namespace, controller, persistentvolumeclaim)\n (\n sum by (namespace, persistentvolumeclaim) (\n max_over_time(kubelet_volume_stats_capacity_bytes{node=~\"$node\", namespace=\"$namespace\"}[$__rate_interval]) - max_over_time(kubelet_volume_stats_available_bytes{node=~\"$node\", namespace=\"$namespace\"}[$__rate_interval])\n ) \n\n * max by (namespace, persistentvolumeclaim) (\n label_replace(\n max_over_time(kube_persistentvolumeclaim_info{namespace=\"$namespace\"}[$__rate_interval]),\n \"persistentvolume\", \"$1\", \"volumename\", \"(.*)\")\n + on (persistentvolume) group_left() max by (persistentvolume) (kube_persistentvolume_is_local == 0)\n )\n\n * on (namespace, persistentvolumeclaim) group_left(pod) \n max by (namespace, persistentvolumeclaim, pod) (\n max_over_time(kube_pod_spec_volumes_persistentvolumeclaims_info{namespace=\"$namespace\"}[$__rate_interval])\n ) \n\n * on (namespace, pod) group_left(controller)\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=~\"$controller\"} \n )\n )\n)", "format": "time_series", "intervalFactor": 1, "legendFormat": "Total", "refId": "B" } ], "thresholds": [], "timeRegions": [], "title": "PVC Usage (except local storage classes)", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "mode": "time", "show": true, "values": [] }, "yaxes": [ { "format": "bytes", "logBase": 1, "min": "0", "show": true }, { "format": "short", "logBase": 1, "show": false } ], "yaxis": { "align": false } }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": { "uid": "$ds_prometheus" }, "description": "This graph does not show any localstorage-related information due to incorrectly calculating the occupied space on localstorage disks", "fill": 1, "fillGradient": 0, "gridPos": { "h": 7, "w": 24, "x": 0, "y": 92 }, "hiddenSeries": false, "id": 597, "legend": { "alignAsTable": true, "avg": true, "current": false, "max": false, "min": false, "rightSide": true, "show": true, "sort": "avg", "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], "nullPointMode": "null as zero", "options": { "alertThreshold": true }, "percentage": false, "pluginVersion": "8.2.6", "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [ { "alias": "/Total .*/", "fill": 0, "linewidth": 2 } ], "spaceLength": 10, "stack": true, "steppedLine": false, "targets": [ { "expr": "sum by (namespace, controller) (\n max by (namespace, controller, persistentvolumeclaim)\n (\n sum by (namespace, persistentvolumeclaim) (\n max_over_time(kubelet_volume_stats_capacity_bytes{node=~\"$node\", namespace=\"$namespace\"}[$__rate_interval]) - max_over_time(kubelet_volume_stats_available_bytes{node=~\"$node\", namespace=\"$namespace\"}[$__rate_interval])\n ) \n\n * max by (namespace, persistentvolumeclaim) (\n label_replace(\n max_over_time(kube_persistentvolumeclaim_info{namespace=\"$namespace\"}[$__rate_interval]),\n \"persistentvolume\", \"$1\", \"volumename\", \"(.*)\")\n + on (persistentvolume) group_left() max by (persistentvolume) (kube_persistentvolume_is_local == 0)\n )\n\n * on (namespace, persistentvolumeclaim) group_left(pod) \n max by (namespace, persistentvolumeclaim, pod) (\n max_over_time(kube_pod_spec_volumes_persistentvolumeclaims_info{namespace=\"$namespace\"}[$__rate_interval])\n ) \n\n * on (namespace, pod) group_left(controller)\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=~\"$controller\"} \n )\n)\n/\nsum by (namespace, controller) (\n max by (namespace, controller, persistentvolumeclaim)\n (\n sum by (namespace, persistentvolumeclaim) (\n max_over_time(kubelet_volume_stats_capacity_bytes{node=~\"$node\", namespace=\"$namespace\"}[$__rate_interval])\n ) \n\n * max by (namespace, persistentvolumeclaim) (\n label_replace(\n max_over_time(kube_persistentvolumeclaim_info{namespace=\"$namespace\"}[$__rate_interval]),\n \"persistentvolume\", \"$1\", \"volumename\", \"(.*)\")\n + on (persistentvolume) group_left() max by (persistentvolume) (kube_persistentvolume_is_local == 0)\n )\n\n * on (namespace, persistentvolumeclaim) group_left(pod) \n max by (namespace, persistentvolumeclaim, pod) (\n max_over_time(kube_pod_spec_volumes_persistentvolumeclaims_info{namespace=\"$namespace\"}[$__rate_interval])\n ) \n\n * on (namespace, pod) group_left(controller)\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=~\"$controller\"} \n )\n)", "format": "time_series", "intervalFactor": 1, "legendFormat": "Usage {{ controller }}", "refId": "A" } ], "thresholds": [], "timeRegions": [], "title": "PVC Usage in % (except local storage classes)", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "mode": "time", "show": true, "values": [] }, "yaxes": [ { "format": "percentunit", "logBase": 1, "min": "0", "show": true }, { "format": "short", "logBase": 1, "show": false } ], "yaxis": { "align": false } }, { "collapsed": true, "datasource": { "type": "prometheus", "uid": "$ds_prometheus" }, "gridPos": { "h": 1, "w": 24, "x": 0, "y": 99 }, "id": 602, "panels": [ { "datasource": { "uid": "$ds_prometheus" }, "fieldConfig": { "defaults": { "color": { "mode": "thresholds" }, "custom": { "displayMode": "auto" }, "decimals": 2, "displayName": "", "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green" }, { "color": "red", "value": 80 } ] }, "unit": "short" }, "overrides": [ { "matcher": { "id": "byName", "options": "Time" }, "properties": [ { "id": "displayName", "value": "Time" }, { "id": "custom.align" } ] }, { "matcher": { "id": "byName", "options": "persistentvolumeclaim" }, "properties": [ { "id": "displayName", "value": "Name" }, { "id": "unit", "value": "short" }, { "id": "decimals", "value": 2 }, { "id": "custom.align" } ] }, { "matcher": { "id": "byName", "options": "storageclass" }, "properties": [ { "id": "displayName", "value": "StorageClass" }, { "id": "unit", "value": "short" }, { "id": "decimals", "value": 2 }, { "id": "custom.align" } ] }, { "matcher": { "id": "byName", "options": "Value #B" }, "properties": [ { "id": "displayName", "value": "Requested" }, { "id": "unit", "value": "bytes" }, { "id": "decimals", "value": 2 }, { "id": "custom.align" } ] }, { "matcher": { "id": "byName", "options": "Value #C" }, "properties": [ { "id": "displayName", "value": "Provisioned" }, { "id": "unit", "value": "bytes" }, { "id": "decimals", "value": 2 }, { "id": "custom.align" } ] }, { "matcher": { "id": "byName", "options": "Value #D" }, "properties": [ { "id": "displayName", "value": "Capacity" }, { "id": "unit", "value": "bytes" }, { "id": "decimals", "value": 2 }, { "id": "custom.align" } ] }, { "matcher": { "id": "byName", "options": "Value #E" }, "properties": [ { "id": "displayName", "value": "Used bytes" }, { "id": "unit", "value": "bytes" }, { "id": "decimals", "value": 2 }, { "id": "custom.align" } ] }, { "matcher": { "id": "byName", "options": "Value #F" }, "properties": [ { "id": "displayName", "value": "Used inodes" }, { "id": "unit", "value": "short" }, { "id": "custom.align" } ] }, { "matcher": { "id": "byName", "options": "Value #G" }, "properties": [ { "id": "displayName", "value": "Used bytes (%)" }, { "id": "unit", "value": "percentunit" }, { "id": "decimals", "value": 2 }, { "id": "custom.align" } ] }, { "matcher": { "id": "byName", "options": "Value #H" }, "properties": [ { "id": "displayName", "value": "Used inodes (%)" }, { "id": "unit", "value": "short" }, { "id": "decimals", "value": 2 }, { "id": "custom.align" } ] }, { "matcher": { "id": "byName", "options": "controller" }, "properties": [ { "id": "displayName", "value": "Used by controller" }, { "id": "unit", "value": "short" }, { "id": "decimals", "value": 2 }, { "id": "custom.align" } ] } ] }, "gridPos": { "h": 7, "w": 24, "x": 0, "y": 100 }, "id": 600, "links": [], "options": { "showHeader": true }, "pluginVersion": "8.2.6", "targets": [ { "expr": "max by (persistentvolumeclaim, storageclass) (\n max by (namespace, persistentvolumeclaim) (\n max by (namespace, persistentvolumeclaim, pod) (\n max_over_time(kube_pod_spec_volumes_persistentvolumeclaims_info{namespace=\"$namespace\"}[$__range])\n ) \n * on (namespace, pod) group_left(controller)\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=~\"$controller\"}\n )\n* on (namespace, persistentvolumeclaim) group_right() kube_persistentvolumeclaim_info{namespace=\"$namespace\"})", "format": "table", "hide": false, "instant": true, "intervalFactor": 1, "legendFormat": "Name, StorageClass", "refId": "A" }, { "expr": "max by (persistentvolumeclaim) (\n max by (namespace, persistentvolumeclaim) (\n max by (namespace, persistentvolumeclaim, pod) (\n max_over_time(kube_pod_spec_volumes_persistentvolumeclaims_info{namespace=\"$namespace\"}[$__range])\n ) \n * on (namespace, pod) group_left(controller)\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=~\"$controller\"}\n )\n* on (namespace, persistentvolumeclaim) group_right() kube_persistentvolumeclaim_resource_requests_storage_bytes{namespace=\"$namespace\"})", "format": "table", "hide": false, "instant": true, "intervalFactor": 1, "legendFormat": "Requested (pvc)", "refId": "B" }, { "expr": "max by (persistentvolumeclaim) (\n max by (namespace, persistentvolumeclaim) (\n max by (namespace, persistentvolumeclaim, pod) (\n max_over_time(kube_pod_spec_volumes_persistentvolumeclaims_info{namespace=\"$namespace\"}[$__range])\n ) \n * on (namespace, pod) group_left(controller)\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=~\"$controller\"}\n )\n)\n* on (persistentvolumeclaim) group_right()\n(\nmax by (persistentvolumeclaim) (\n max by (persistentvolume) (max_over_time(kube_persistentvolume_capacity_bytes[$__range])) \n * on (persistentvolume) group_right() \n max by (persistentvolume, persistentvolumeclaim) (\n label_replace(\n max_over_time(kube_persistentvolumeclaim_info{namespace=\"$namespace\"}[$__range]),\n \"persistentvolume\", \"$1\", \"volumename\", \"(.*)\")\n )\n)\n)", "format": "table", "hide": false, "instant": true, "intervalFactor": 1, "legendFormat": "Provisioned (pv)", "refId": "C" }, { "expr": "max by (persistentvolumeclaim) (\n max_over_time(kubelet_volume_stats_capacity_bytes{node=~\"$node\", namespace=\"$namespace\"}[$__range])\n)", "format": "table", "hide": false, "instant": true, "intervalFactor": 1, "legendFormat": "Capacity (real)", "refId": "D" }, { "expr": "max by (persistentvolumeclaim) (\n max_over_time(kubelet_volume_stats_capacity_bytes{node=~\"$node\", namespace=\"$namespace\"}[$__range]) - max_over_time(kubelet_volume_stats_available_bytes{node=~\"$node\", namespace=\"$namespace\"}[$__range])\n)", "format": "table", "hide": false, "instant": true, "intervalFactor": 1, "legendFormat": "used bytes", "refId": "E" }, { "expr": "max by (persistentvolumeclaim) (\n max_over_time(kubelet_volume_stats_inodes_used{node=~\"$node\", namespace=\"$namespace\"}[$__range])\n)", "format": "table", "instant": true, "intervalFactor": 1, "legendFormat": "used inodes", "refId": "F" }, { "expr": "max by (persistentvolumeclaim) (\n (max_over_time(kubelet_volume_stats_capacity_bytes{node=~\"$node\", namespace=\"$namespace\"}[$__range]) - max_over_time(kubelet_volume_stats_available_bytes{node=~\"$node\", namespace=\"$namespace\"}[$__range]))\n / \n max_over_time(kubelet_volume_stats_capacity_bytes{node=~\"$node\", namespace=\"$namespace\"}[$__range])\n)", "format": "table", "instant": true, "intervalFactor": 1, "legendFormat": "used bytes %", "refId": "G" }, { "expr": "max by (persistentvolumeclaim) (\n max_over_time(kubelet_volume_stats_inodes_used{node=~\"$node\", namespace=\"$namespace\"}[$__range]) \n / \n max_over_time(kubelet_volume_stats_inodes{node=~\"$node\", namespace=\"$namespace\"}[$__range])\n)", "format": "table", "instant": true, "intervalFactor": 1, "legendFormat": "used inodes %", "refId": "H" }, { "expr": "sum by (persistentvolumeclaim, controller) (\n max by (namespace, persistentvolumeclaim, pod) (\n max_over_time(kube_pod_spec_volumes_persistentvolumeclaims_info{namespace=\"$namespace\"}[$__rate_interval])\n ) \n * on (namespace, pod) group_left(controller)\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=~\"$controller\"} \n)", "format": "table", "instant": true, "intervalFactor": 1, "legendFormat": "used by controller", "refId": "I" } ], "title": "Overview", "transformations": [ { "id": "merge", "options": { "reducers": [] } }, { "id": "filterFieldsByName", "options": { "include": { "names": [ "persistentvolumeclaim", "storageclass", "Value #B", "Value #C", "Value #D", "Value #E", "Value #F", "Value #G", "Value #H", "controller" ] } } } ], "type": "table" }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": { "uid": "$ds_prometheus" }, "description": "", "fill": 1, "fillGradient": 0, "gridPos": { "h": 7, "w": 24, "x": 0, "y": 107 }, "hiddenSeries": false, "id": 603, "legend": { "alignAsTable": true, "avg": true, "current": false, "max": false, "min": false, "rightSide": true, "show": true, "sort": "avg", "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], "nullPointMode": "null as zero", "options": { "alertThreshold": true }, "percentage": false, "pluginVersion": "8.2.6", "pointradius": 5, "points": false, "renderer": "flot", "repeat": "persistentvolumeclaim", "repeatDirection": "h", "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "expr": "\n\nmax by (persistentvolumeclaim) (\n max by (namespace, persistentvolumeclaim) (\n max by (namespace, persistentvolumeclaim, pod) (\n max_over_time(kube_pod_spec_volumes_persistentvolumeclaims_info{namespace=\"$namespace\"}[$__range])\n ) \n * on (namespace, pod) group_left(controller)\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=~\"$controller\"}\n )\n)\n* on (persistentvolumeclaim) group_right()\nmax by (persistentvolumeclaim) (\n max_over_time(kube_persistentvolume_capacity_bytes[$__rate_interval]) \n * on (persistentvolume) group_right() \n (\n label_replace(\n max_over_time(kube_persistentvolumeclaim_info{namespace=\"$namespace\", persistentvolumeclaim=~\"$persistentvolumeclaim\"}[$__rate_interval]),\n \"persistentvolume\", \"$1\", \"volumename\", \"(.*)\")\n )\n)", "format": "time_series", "intervalFactor": 1, "legendFormat": "Provisioned", "refId": "A" }, { "expr": "max by (persistentvolumeclaim) (\n max_over_time(kubelet_volume_stats_capacity_bytes{node=~\"$node\", namespace=\"$namespace\", persistentvolumeclaim=\"$persistentvolumeclaim\"}[$__rate_interval])\n)", "format": "time_series", "intervalFactor": 1, "legendFormat": "Capacity", "refId": "B" }, { "expr": "max by (persistentvolumeclaim) (\n max_over_time(kubelet_volume_stats_capacity_bytes{node=~\"$node\", namespace=\"$namespace\", persistentvolumeclaim=\"$persistentvolumeclaim\"}[$__rate_interval]) - max_over_time(kubelet_volume_stats_available_bytes{node=~\"$node\", namespace=\"$namespace\", persistentvolumeclaim=\"$persistentvolumeclaim\"}[$__rate_interval])\n)", "format": "time_series", "intervalFactor": 1, "legendFormat": "Used", "refId": "C" } ], "thresholds": [], "timeRegions": [], "title": "$persistentvolumeclaim", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "mode": "time", "show": true, "values": [] }, "yaxes": [ { "decimals": 2, "format": "bytes", "logBase": 1, "min": "0", "show": true }, { "format": "short", "logBase": 1, "show": false } ], "yaxis": { "align": false } } ], "title": "PVC Detailed", "type": "row" } ], "refresh": "", "schemaVersion": 36, "style": "dark", "tags": [ "main" ], "templating": { "list": [ { "current": { "selected": false, "text": "default", "value": "default" }, "hide": 0, "includeAll": false, "label": "Prometheus", "multi": false, "name": "ds_prometheus", "options": [], "query": "prometheus", "refresh": 1, "regex": "", "skipUrlSync": false, "type": "datasource" }, { "allValue": ".*", "current": { "selected": true, "text": [ "All" ], "value": [ "$__all" ] }, "datasource": { "type": "prometheus", "uid": "$ds_prometheus" }, "definition": "label_values(kubernetes_build_info, node)", "hide": 0, "includeAll": true, "label": "Node", "multi": true, "name": "node", "options": [], "query": { "query": "label_values(kubernetes_build_info, node)", "refId": "main-node-Variable-Query" }, "refresh": 2, "regex": "", "skipUrlSync": false, "sort": 0, "tagValuesQuery": "", "tagsQuery": "", "type": "query", "useTags": false }, { "current": { "selected": false, "text": "d8-monitoring", "value": "d8-monitoring" }, "datasource": { "type": "prometheus", "uid": "$ds_prometheus" }, "definition": "label_values(kube_pod_info{node=~\"$node\"}, namespace)", "hide": 0, "includeAll": false, "label": "Namespace", "multi": false, "name": "namespace", "options": [], "query": { "query": "label_values(kube_pod_info{node=~\"$node\"}, namespace)", "refId": "main-namespace-Variable-Query" }, "refresh": 2, "regex": "", "skipUrlSync": false, "sort": 0, "tagValuesQuery": "", "tagsQuery": "", "type": "query", "useTags": false }, { "allValue": ".*", "current": { "selected": true, "text": [ "All" ], "value": [ ".*" ] }, "datasource": { "type": "prometheus", "uid": "$ds_prometheus" }, "definition": "label_values(kube_controller_pod{node=~\"$node\", namespace=~\"$namespace\"}, controller_type)", "hide": 0, "includeAll": true, "label": "Controller Type", "multi": true, "name": "controller_type", "options": [], "query": { "query": "label_values(kube_controller_pod{node=~\"$node\", namespace=~\"$namespace\"}, controller_type)", "refId": "main-controller_type-Variable-Query" }, "refresh": 2, "regex": "", "skipUrlSync": false, "sort": 0, "tagValuesQuery": "", "tagsQuery": "", "type": "query", "useTags": false }, { "allValue": ".*", "current": { "selected": false, "text": "All", "value": "$__all" }, "datasource": { "type": "prometheus", "uid": "$ds_prometheus" }, "definition": "label_values(kube_controller_pod{node=~\"$node\", namespace=~\"$namespace\", controller_type=~\"$controller_type\"}, controller)", "hide": 0, "includeAll": true, "label": "Controller", "multi": true, "name": "controller", "options": [], "query": { "query": "label_values(kube_controller_pod{node=~\"$node\", namespace=~\"$namespace\", controller_type=~\"$controller_type\"}, controller)", "refId": "main-controller-Variable-Query" }, "refresh": 1, "regex": "", "skipUrlSync": false, "sort": 0, "tagValuesQuery": "", "tagsQuery": "", "type": "query", "useTags": false }, { "allValue": ".*", "current": { "selected": true, "text": [ "All" ], "value": [ "$__all" ] }, "datasource": { "type": "prometheus", "uid": "$ds_prometheus" }, "definition": "label_values(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=\"$controller\"}, pod)", "hide": 2, "includeAll": true, "label": "Pod", "multi": true, "name": "pod", "options": [], "query": { "query": "label_values(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=\"$controller\"}, pod)", "refId": "main-pod-Variable-Query" }, "refresh": 2, "regex": "", "skipUrlSync": false, "sort": 0, "tagValuesQuery": "", "tagsQuery": "", "type": "query", "useTags": false }, { "allValue": ".*", "current": { "selected": true, "text": [ "All" ], "value": [ "$__all" ] }, "datasource": { "type": "prometheus", "uid": "$ds_prometheus" }, "definition": "query_result(max_over_time(kubelet_volume_stats_capacity_bytes{node=~\"$node\", namespace=\"$namespace\"}[$__range]))", "hide": 2, "includeAll": true, "label": "PersistentVolumeClaim", "multi": true, "name": "persistentvolumeclaim", "options": [], "query": { "query": "query_result(max_over_time(kubelet_volume_stats_capacity_bytes{node=~\"$node\", namespace=\"$namespace\"}[$__range]))", "refId": "main-persistentvolumeclaim-Variable-Query" }, "refresh": 2, "regex": "/.*persistentvolumeclaim=\"([^\"]+)\".*/", "skipUrlSync": false, "sort": 1, "tagValuesQuery": "", "tagsQuery": "", "type": "query", "useTags": false } ] }, "time": { "from": "now-3h", "to": "now" }, "timepicker": { "refresh_intervals": [ "5s", "10s", "30s", "1m", "5m", "15m", "30m", "1h", "2h", "1d" ], "time_options": [ "5m", "15m", "1h", "6h", "12h", "24h", "2d", "7d", "30d" ] }, "timezone": "", "title": "Namespace", "uid": "sZzUB4ymk1", "version": 1, "weekStart": "" }