diff --git a/grafana/qa-debugging.json b/grafana/qa-debugging.json new file mode 100644 index 000000000..974fa1dfd --- /dev/null +++ b/grafana/qa-debugging.json @@ -0,0 +1,798 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, + "type": "dashboard" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "enable": true, + "expr": "increase(kube_pod_container_status_restarts_total{namespace=\"$namespace\",pod=\"$pod\"}[10m])", + "iconColor": "dark-purple", + "name": "Restarts", + "titleFormat": "Restart" + } + ] + }, + "description": "", + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 1, + "id": 2, + "iteration": 1667477711742, + "links": [], + "liveNow": false, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "${prom}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineStyle": { + "fill": "solid" + }, + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": ".* requests" + }, + "properties": [ + { + "id": "custom.lineStyle", + "value": { + "dash": [ + 10, + 15 + ], + "fill": "dash" + } + }, + { + "id": "color", + "value": { + "fixedColor": "yellow", + "mode": "fixed", + "seriesBy": "last" + } + }, + { + "id": "custom.lineWidth", + "value": 2 + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": ".* limits" + }, + "properties": [ + { + "id": "custom.lineStyle", + "value": { + "dash": [ + 10, + 10 + ], + "fill": "dash" + } + }, + { + "id": "color", + "value": { + "fixedColor": "red", + "mode": "fixed" + } + }, + { + "id": "custom.lineWidth", + "value": 2 + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": ".* throttling" + }, + "properties": [ + { + "id": "custom.axisPlacement", + "value": "right" + }, + { + "id": "unit", + "value": "percentunit" + }, + { + "id": "color", + "value": { + "fixedColor": "dark-blue", + "mode": "fixed" + } + }, + { + "id": "max", + "value": 1 + }, + { + "id": "custom.lineWidth", + "value": 2 + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": ".* usage" + }, + "properties": [ + { + "id": "custom.fillOpacity", + "value": 100 + } + ] + } + ] + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 0 + }, + "id": 2, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{namespace=\"$namespace\", pod=\"$pod\", container=\"$container\"}) by (container)", + "legendFormat": "{{container}} usage", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "sum(kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"cpu\", container=\"$container\"}) by (container)", + "hide": false, + "legendFormat": "{{container}} requests", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "sum(kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container=\"$container\", resource=\"cpu\"}) by (container)", + "hide": false, + "legendFormat": "{{container}} limits", + "range": true, + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "sum(increase(container_cpu_cfs_throttled_periods_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", namespace=\"$namespace\", pod=\"$pod\", container=\"$container\", cluster=\"$cluster\"}[$__rate_interval])) by (container) /sum(increase(container_cpu_cfs_periods_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", namespace=\"$namespace\", pod=\"$pod\", container=\"$container\", cluster=\"$cluster\"}[$__rate_interval])) by (container)\n", + "hide": false, + "legendFormat": "{{container}} throttling", + "range": true, + "refId": "D" + } + ], + "title": "CPU Usage (in shares of cores)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${prom}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineStyle": { + "fill": "solid" + }, + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "bytes" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": ".* requests" + }, + "properties": [ + { + "id": "custom.lineStyle", + "value": { + "dash": [ + 10, + 15 + ], + "fill": "dash" + } + }, + { + "id": "color", + "value": { + "fixedColor": "yellow", + "mode": "fixed", + "seriesBy": "last" + } + }, + { + "id": "custom.lineWidth", + "value": 2 + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": ".* limits" + }, + "properties": [ + { + "id": "custom.lineStyle", + "value": { + "dash": [ + 10, + 10 + ], + "fill": "dash" + } + }, + { + "id": "color", + "value": { + "fixedColor": "red", + "mode": "fixed" + } + }, + { + "id": "custom.lineWidth", + "value": 2 + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": ".* usage" + }, + "properties": [ + { + "id": "custom.fillOpacity", + "value": 100 + } + ] + } + ] + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 0 + }, + "id": 3, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "sum(container_memory_working_set_bytes{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container=\"$container\", image!=\"\"}) by (container)", + "legendFormat": "{{container}} usage", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "sum(kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"memory\", container=\"$container\"}) by (container)", + "hide": false, + "legendFormat": "{{container}} requests", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "sum(kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container=\"$container\", resource=\"memory\"}) by (container)", + "hide": false, + "legendFormat": "{{container}} limits", + "range": true, + "refId": "C" + } + ], + "title": "Memory usage", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${prom}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 3, + "x": 0, + "y": 9 + }, + "id": 5, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "8.5.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "expr": "sum(kube_pod_container_status_restarts_total{namespace=\"$namespace\",pod=\"$pod\",container=\"$container\"})", + "refId": "A" + } + ], + "title": "Total restarts amount", + "type": "stat" + }, + { + "datasource": { + "type": "elasticsearch", + "uid": "${es}" + }, + "gridPos": { + "h": 13, + "w": 21, + "x": 3, + "y": 9 + }, + "id": 7, + "options": { + "dedupStrategy": "none", + "enableLogDetails": true, + "prettifyLogMessage": false, + "showCommonLabels": false, + "showLabels": false, + "showTime": true, + "sortOrder": "Descending", + "wrapLogMessage": false + }, + "targets": [ + { + "alias": "{{message}}", + "bucketAggs": [ + { + "field": "@timestamp", + "id": "2", + "settings": { + "interval": "auto" + }, + "type": "date_histogram" + } + ], + "datasource": { + "type": "elasticsearch", + "uid": "EE7WRd67z" + }, + "metrics": [ + { + "id": "1", + "type": "logs" + } + ], + "query": "kubernetes.namespace_name.keyword: \"$namespace\" AND kubernetes.pod_name.keyword: \"$pod\" AND kubernetes.container_name.keyword: \"$container\"", + "refId": "A", + "timeField": "@timestamp" + } + ], + "title": "Logs", + "type": "logs" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${prom}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 3, + "x": 0, + "y": 15 + }, + "id": 9, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "8.5.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "expr": "sum(kube_pod_container_status_last_terminated_exitcode{namespace=\"$namespace\",pod=\"$pod\",container=\"$container\"})", + "refId": "A" + } + ], + "title": "Last restart exit code", + "type": "stat" + } + ], + "refresh": "10s", + "schemaVersion": 36, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "current": { + "selected": false, + "text": "Prometheus", + "value": "Prometheus" + }, + "hide": 0, + "includeAll": false, + "label": "Prometheus source", + "multi": false, + "name": "prom", + "options": [], + "query": "prometheus", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "type": "datasource" + }, + { + "current": { + "selected": false, + "text": "Elasticsearch", + "value": "Elasticsearch" + }, + "hide": 0, + "includeAll": false, + "label": "ES source", + "multi": false, + "name": "es", + "options": [], + "query": "elasticsearch", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "type": "datasource" + }, + { + "current": { + "selected": false, + "text": "openwifi-qa01", + "value": "openwifi-qa01" + }, + "datasource": { + "type": "prometheus", + "uid": "${prom}" + }, + "definition": "label_values(kube_namespace_status_phase{job=\"kube-state-metrics\", cluster=\"$cluster\"}, namespace)", + "hide": 0, + "includeAll": false, + "label": "Namespace", + "multi": false, + "name": "namespace", + "options": [], + "query": { + "query": "label_values(kube_namespace_status_phase{job=\"kube-state-metrics\", cluster=\"$cluster\"}, namespace)", + "refId": "StandardVariableQuery" + }, + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "type": "query" + }, + { + "current": { + "isNone": true, + "selected": false, + "text": "None", + "value": "" + }, + "datasource": { + "type": "prometheus", + "uid": "${prom}" + }, + "definition": "label_values(up{job=\"kube-state-metrics\"}, cluster)", + "hide": 2, + "includeAll": false, + "multi": false, + "name": "cluster", + "options": [], + "query": { + "query": "label_values(up{job=\"kube-state-metrics\"}, cluster)", + "refId": "StandardVariableQuery" + }, + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "kafka-0", + "value": "kafka-0" + }, + "datasource": { + "type": "prometheus", + "uid": "${prom}" + }, + "definition": "label_values(kube_pod_info{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\"}, pod)", + "hide": 0, + "includeAll": false, + "label": "Pod", + "multi": false, + "name": "pod", + "options": [], + "query": { + "query": "label_values(kube_pod_info{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\"}, pod)", + "refId": "StandardVariableQuery" + }, + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "kafka", + "value": "kafka" + }, + "datasource": { + "type": "prometheus", + "uid": "${prom}" + }, + "definition": "label_values(container_memory_usage_bytes{namespace=\"$namespace\", pod=\"$pod\", container!=\"POD\"}, container)", + "hide": 0, + "includeAll": false, + "label": "Container", + "multi": false, + "name": "container", + "options": [], + "query": { + "query": "label_values(container_memory_usage_bytes{namespace=\"$namespace\", pod=\"$pod\", container!=\"POD\"}, container)", + "refId": "StandardVariableQuery" + }, + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + } + ] + }, + "time": { + "from": "now-6h", + "to": "now" + }, + "timepicker": {}, + "timezone": "", + "title": "QA debugging", + "uid": "1Ge5lwNVz", + "version": 36, + "weekStart": "" +} \ No newline at end of file