mirror of
				https://github.com/optim-enterprises-bv/kubernetes.git
				synced 2025-10-31 02:08:13 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			385 lines
		
	
	
		
			13 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
			
		
		
	
	
			385 lines
		
	
	
		
			13 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
| // Copyright 2020 Google Inc. All Rights Reserved.
 | |
| //
 | |
| // Licensed under the Apache License, Version 2.0 (the "License");
 | |
| // you may not use this file except in compliance with the License.
 | |
| // You may obtain a copy of the License at
 | |
| //
 | |
| //     http://www.apache.org/licenses/LICENSE-2.0
 | |
| //
 | |
| // Unless required by applicable law or agreed to in writing, software
 | |
| // distributed under the License is distributed on an "AS IS" BASIS,
 | |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | |
| // See the License for the specific language governing permissions and
 | |
| // limitations under the License.
 | |
| 
 | |
| package metrics
 | |
| 
 | |
| import (
 | |
| 	"strconv"
 | |
| 
 | |
| 	"github.com/prometheus/client_golang/prometheus"
 | |
| 
 | |
| 	"github.com/google/cadvisor/container"
 | |
| 	info "github.com/google/cadvisor/info/v1"
 | |
| 
 | |
| 	"k8s.io/klog/v2"
 | |
| )
 | |
| 
 | |
| var baseLabelsNames = []string{"machine_id", "system_uuid", "boot_id"}
 | |
| 
 | |
| const (
 | |
| 	prometheusModeLabelName       = "mode"
 | |
| 	prometheusTypeLabelName       = "type"
 | |
| 	prometheusLevelLabelName      = "level"
 | |
| 	prometheusNodeLabelName       = "node_id"
 | |
| 	prometheusCoreLabelName       = "core_id"
 | |
| 	prometheusThreadLabelName     = "thread_id"
 | |
| 	prometheusPageSizeLabelName   = "page_size"
 | |
| 	prometheusTargetNodeLabelName = "target_node_id"
 | |
| 
 | |
| 	nvmMemoryMode    = "memory_mode"
 | |
| 	nvmAppDirectMode = "app_direct_mode"
 | |
| 
 | |
| 	memoryByTypeDimmCountKey    = "DimmCount"
 | |
| 	memoryByTypeDimmCapacityKey = "Capacity"
 | |
| 
 | |
| 	emptyLabelValue = ""
 | |
| )
 | |
| 
 | |
| // machineMetric describes a multi-dimensional metric used for exposing a
 | |
| // certain type of machine statistic.
 | |
| type machineMetric struct {
 | |
| 	name        string
 | |
| 	help        string
 | |
| 	valueType   prometheus.ValueType
 | |
| 	extraLabels []string
 | |
| 	condition   func(machineInfo *info.MachineInfo) bool
 | |
| 	getValues   func(machineInfo *info.MachineInfo) metricValues
 | |
| }
 | |
| 
 | |
| func (metric *machineMetric) desc(baseLabels []string) *prometheus.Desc {
 | |
| 	return prometheus.NewDesc(metric.name, metric.help, append(baseLabels, metric.extraLabels...), nil)
 | |
| }
 | |
| 
 | |
| // PrometheusMachineCollector implements prometheus.Collector.
 | |
| type PrometheusMachineCollector struct {
 | |
| 	infoProvider   infoProvider
 | |
| 	errors         prometheus.Gauge
 | |
| 	machineMetrics []machineMetric
 | |
| }
 | |
| 
 | |
| // NewPrometheusMachineCollector returns a new PrometheusCollector.
 | |
| func NewPrometheusMachineCollector(i infoProvider, includedMetrics container.MetricSet) *PrometheusMachineCollector {
 | |
| 	c := &PrometheusMachineCollector{
 | |
| 
 | |
| 		infoProvider: i,
 | |
| 		errors: prometheus.NewGauge(prometheus.GaugeOpts{
 | |
| 			Namespace: "machine",
 | |
| 			Name:      "scrape_error",
 | |
| 			Help:      "1 if there was an error while getting machine metrics, 0 otherwise.",
 | |
| 		}),
 | |
| 		machineMetrics: []machineMetric{
 | |
| 			{
 | |
| 				name:      "machine_cpu_physical_cores",
 | |
| 				help:      "Number of physical CPU cores.",
 | |
| 				valueType: prometheus.GaugeValue,
 | |
| 				getValues: func(machineInfo *info.MachineInfo) metricValues {
 | |
| 					return metricValues{{value: float64(machineInfo.NumPhysicalCores), timestamp: machineInfo.Timestamp}}
 | |
| 				},
 | |
| 			},
 | |
| 			{
 | |
| 				name:      "machine_cpu_cores",
 | |
| 				help:      "Number of logical CPU cores.",
 | |
| 				valueType: prometheus.GaugeValue,
 | |
| 				getValues: func(machineInfo *info.MachineInfo) metricValues {
 | |
| 					return metricValues{{value: float64(machineInfo.NumCores), timestamp: machineInfo.Timestamp}}
 | |
| 				},
 | |
| 			},
 | |
| 			{
 | |
| 				name:      "machine_cpu_sockets",
 | |
| 				help:      "Number of CPU sockets.",
 | |
| 				valueType: prometheus.GaugeValue,
 | |
| 				getValues: func(machineInfo *info.MachineInfo) metricValues {
 | |
| 					return metricValues{{value: float64(machineInfo.NumSockets), timestamp: machineInfo.Timestamp}}
 | |
| 				},
 | |
| 			},
 | |
| 			{
 | |
| 				name:      "machine_memory_bytes",
 | |
| 				help:      "Amount of memory installed on the machine.",
 | |
| 				valueType: prometheus.GaugeValue,
 | |
| 				getValues: func(machineInfo *info.MachineInfo) metricValues {
 | |
| 					return metricValues{{value: float64(machineInfo.MemoryCapacity), timestamp: machineInfo.Timestamp}}
 | |
| 				},
 | |
| 			},
 | |
| 			{
 | |
| 				name:        "machine_dimm_count",
 | |
| 				help:        "Number of RAM DIMM (all types memory modules) value labeled by dimm type.",
 | |
| 				valueType:   prometheus.GaugeValue,
 | |
| 				extraLabels: []string{prometheusTypeLabelName},
 | |
| 				condition:   func(machineInfo *info.MachineInfo) bool { return len(machineInfo.MemoryByType) != 0 },
 | |
| 				getValues: func(machineInfo *info.MachineInfo) metricValues {
 | |
| 					return getMemoryByType(machineInfo, memoryByTypeDimmCountKey)
 | |
| 				},
 | |
| 			},
 | |
| 			{
 | |
| 				name:        "machine_dimm_capacity_bytes",
 | |
| 				help:        "Total RAM DIMM capacity (all types memory modules) value labeled by dimm type.",
 | |
| 				valueType:   prometheus.GaugeValue,
 | |
| 				extraLabels: []string{prometheusTypeLabelName},
 | |
| 				condition:   func(machineInfo *info.MachineInfo) bool { return len(machineInfo.MemoryByType) != 0 },
 | |
| 				getValues: func(machineInfo *info.MachineInfo) metricValues {
 | |
| 					return getMemoryByType(machineInfo, memoryByTypeDimmCapacityKey)
 | |
| 				},
 | |
| 			},
 | |
| 			{
 | |
| 				name:        "machine_nvm_capacity",
 | |
| 				help:        "NVM capacity value labeled by NVM mode (memory mode or app direct mode).",
 | |
| 				valueType:   prometheus.GaugeValue,
 | |
| 				extraLabels: []string{prometheusModeLabelName},
 | |
| 				getValues: func(machineInfo *info.MachineInfo) metricValues {
 | |
| 					return metricValues{
 | |
| 						{value: float64(machineInfo.NVMInfo.MemoryModeCapacity), labels: []string{nvmMemoryMode}, timestamp: machineInfo.Timestamp},
 | |
| 						{value: float64(machineInfo.NVMInfo.AppDirectModeCapacity), labels: []string{nvmAppDirectMode}, timestamp: machineInfo.Timestamp},
 | |
| 					}
 | |
| 				},
 | |
| 			},
 | |
| 			{
 | |
| 				name:      "machine_nvm_avg_power_budget_watts",
 | |
| 				help:      "NVM power budget.",
 | |
| 				valueType: prometheus.GaugeValue,
 | |
| 				getValues: func(machineInfo *info.MachineInfo) metricValues {
 | |
| 					return metricValues{{value: float64(machineInfo.NVMInfo.AvgPowerBudget), timestamp: machineInfo.Timestamp}}
 | |
| 				},
 | |
| 			},
 | |
| 		},
 | |
| 	}
 | |
| 
 | |
| 	if includedMetrics.Has(container.CPUTopologyMetrics) {
 | |
| 		c.machineMetrics = append(c.machineMetrics, []machineMetric{
 | |
| 			{
 | |
| 				name:        "machine_cpu_cache_capacity_bytes",
 | |
| 				help:        "Cache size in bytes assigned to NUMA node and CPU core.",
 | |
| 				valueType:   prometheus.GaugeValue,
 | |
| 				extraLabels: []string{prometheusNodeLabelName, prometheusCoreLabelName, prometheusTypeLabelName, prometheusLevelLabelName},
 | |
| 				getValues: func(machineInfo *info.MachineInfo) metricValues {
 | |
| 					return getCaches(machineInfo)
 | |
| 				},
 | |
| 			},
 | |
| 			{
 | |
| 				name:        "machine_thread_siblings_count",
 | |
| 				help:        "Number of CPU thread siblings.",
 | |
| 				valueType:   prometheus.GaugeValue,
 | |
| 				extraLabels: []string{prometheusNodeLabelName, prometheusCoreLabelName, prometheusThreadLabelName},
 | |
| 				getValues: func(machineInfo *info.MachineInfo) metricValues {
 | |
| 					return getThreadsSiblingsCount(machineInfo)
 | |
| 				},
 | |
| 			},
 | |
| 			{
 | |
| 				name:        "machine_node_memory_capacity_bytes",
 | |
| 				help:        "Amount of memory assigned to NUMA node.",
 | |
| 				valueType:   prometheus.GaugeValue,
 | |
| 				extraLabels: []string{prometheusNodeLabelName},
 | |
| 				getValues: func(machineInfo *info.MachineInfo) metricValues {
 | |
| 					return getNodeMemory(machineInfo)
 | |
| 				},
 | |
| 			},
 | |
| 			{
 | |
| 				name:        "machine_node_hugepages_count",
 | |
| 				help:        "Numer of hugepages assigned to NUMA node.",
 | |
| 				valueType:   prometheus.GaugeValue,
 | |
| 				extraLabels: []string{prometheusNodeLabelName, prometheusPageSizeLabelName},
 | |
| 				getValues: func(machineInfo *info.MachineInfo) metricValues {
 | |
| 					return getHugePagesCount(machineInfo)
 | |
| 				},
 | |
| 			},
 | |
| 			{
 | |
| 				name:        "machine_node_distance",
 | |
| 				help:        "Distance between NUMA node and target NUMA node.",
 | |
| 				valueType:   prometheus.GaugeValue,
 | |
| 				extraLabels: []string{prometheusNodeLabelName, prometheusTargetNodeLabelName},
 | |
| 				getValues: func(machineInfo *info.MachineInfo) metricValues {
 | |
| 					return getDistance(machineInfo)
 | |
| 				},
 | |
| 			},
 | |
| 		}...)
 | |
| 	}
 | |
| 	return c
 | |
| }
 | |
| 
 | |
| // Describe describes all the machine metrics ever exported by cadvisor. It
 | |
| // implements prometheus.PrometheusCollector.
 | |
| func (collector *PrometheusMachineCollector) Describe(ch chan<- *prometheus.Desc) {
 | |
| 	collector.errors.Describe(ch)
 | |
| 	for _, metric := range collector.machineMetrics {
 | |
| 		ch <- metric.desc([]string{})
 | |
| 	}
 | |
| }
 | |
| 
 | |
| // Collect fetches information about machine and delivers them as
 | |
| // Prometheus metrics. It implements prometheus.PrometheusCollector.
 | |
| func (collector *PrometheusMachineCollector) Collect(ch chan<- prometheus.Metric) {
 | |
| 	collector.errors.Set(0)
 | |
| 	collector.collectMachineInfo(ch)
 | |
| 	collector.errors.Collect(ch)
 | |
| }
 | |
| 
 | |
| func (collector *PrometheusMachineCollector) collectMachineInfo(ch chan<- prometheus.Metric) {
 | |
| 	machineInfo, err := collector.infoProvider.GetMachineInfo()
 | |
| 	if err != nil {
 | |
| 		collector.errors.Set(1)
 | |
| 		klog.Warningf("Couldn't get machine info: %s", err)
 | |
| 		return
 | |
| 	}
 | |
| 
 | |
| 	baseLabelsValues := []string{machineInfo.MachineID, machineInfo.SystemUUID, machineInfo.BootID}
 | |
| 
 | |
| 	for _, metric := range collector.machineMetrics {
 | |
| 		if metric.condition != nil && !metric.condition(machineInfo) {
 | |
| 			continue
 | |
| 		}
 | |
| 
 | |
| 		for _, metricValue := range metric.getValues(machineInfo) {
 | |
| 			labelValues := make([]string, len(baseLabelsValues))
 | |
| 			copy(labelValues, baseLabelsValues)
 | |
| 			if len(metric.extraLabels) != 0 {
 | |
| 				labelValues = append(labelValues, metricValue.labels...)
 | |
| 			}
 | |
| 
 | |
| 			prometheusMetric := prometheus.MustNewConstMetric(metric.desc(baseLabelsNames),
 | |
| 				metric.valueType, metricValue.value, labelValues...)
 | |
| 
 | |
| 			if metricValue.timestamp.IsZero() {
 | |
| 				ch <- prometheusMetric
 | |
| 			} else {
 | |
| 				ch <- prometheus.NewMetricWithTimestamp(metricValue.timestamp, prometheusMetric)
 | |
| 			}
 | |
| 		}
 | |
| 
 | |
| 	}
 | |
| }
 | |
| 
 | |
| func getMemoryByType(machineInfo *info.MachineInfo, property string) metricValues {
 | |
| 	mValues := make(metricValues, 0, len(machineInfo.MemoryByType))
 | |
| 	for memoryType, memoryInfo := range machineInfo.MemoryByType {
 | |
| 		propertyValue := 0.0
 | |
| 		switch property {
 | |
| 		case memoryByTypeDimmCapacityKey:
 | |
| 			propertyValue = float64(memoryInfo.Capacity)
 | |
| 		case memoryByTypeDimmCountKey:
 | |
| 			propertyValue = float64(memoryInfo.DimmCount)
 | |
| 		default:
 | |
| 			klog.Warningf("Incorrect propery name for MemoryByType, property %s", property)
 | |
| 			return metricValues{}
 | |
| 		}
 | |
| 		mValues = append(mValues, metricValue{value: propertyValue, labels: []string{memoryType}, timestamp: machineInfo.Timestamp})
 | |
| 	}
 | |
| 	return mValues
 | |
| }
 | |
| 
 | |
| func getThreadsSiblingsCount(machineInfo *info.MachineInfo) metricValues {
 | |
| 	mValues := make(metricValues, 0, machineInfo.NumCores)
 | |
| 	for _, node := range machineInfo.Topology {
 | |
| 		nodeID := strconv.Itoa(node.Id)
 | |
| 
 | |
| 		for _, core := range node.Cores {
 | |
| 			coreID := strconv.Itoa(core.Id)
 | |
| 			siblingsCount := len(core.Threads)
 | |
| 
 | |
| 			for _, thread := range core.Threads {
 | |
| 				mValues = append(mValues,
 | |
| 					metricValue{
 | |
| 						value:     float64(siblingsCount),
 | |
| 						labels:    []string{nodeID, coreID, strconv.Itoa(thread)},
 | |
| 						timestamp: machineInfo.Timestamp,
 | |
| 					})
 | |
| 			}
 | |
| 		}
 | |
| 	}
 | |
| 	return mValues
 | |
| }
 | |
| 
 | |
| func getNodeMemory(machineInfo *info.MachineInfo) metricValues {
 | |
| 	mValues := make(metricValues, 0, len(machineInfo.Topology))
 | |
| 	for _, node := range machineInfo.Topology {
 | |
| 		nodeID := strconv.Itoa(node.Id)
 | |
| 		mValues = append(mValues,
 | |
| 			metricValue{
 | |
| 				value:     float64(node.Memory),
 | |
| 				labels:    []string{nodeID},
 | |
| 				timestamp: machineInfo.Timestamp,
 | |
| 			})
 | |
| 	}
 | |
| 	return mValues
 | |
| }
 | |
| 
 | |
| func getHugePagesCount(machineInfo *info.MachineInfo) metricValues {
 | |
| 	mValues := make(metricValues, 0)
 | |
| 	for _, node := range machineInfo.Topology {
 | |
| 		nodeID := strconv.Itoa(node.Id)
 | |
| 
 | |
| 		for _, hugePage := range node.HugePages {
 | |
| 			mValues = append(mValues,
 | |
| 				metricValue{
 | |
| 					value:     float64(hugePage.NumPages),
 | |
| 					labels:    []string{nodeID, strconv.FormatUint(hugePage.PageSize, 10)},
 | |
| 					timestamp: machineInfo.Timestamp,
 | |
| 				})
 | |
| 		}
 | |
| 	}
 | |
| 	return mValues
 | |
| }
 | |
| 
 | |
| func getCaches(machineInfo *info.MachineInfo) metricValues {
 | |
| 	mValues := make(metricValues, 0)
 | |
| 	for _, node := range machineInfo.Topology {
 | |
| 		nodeID := strconv.Itoa(node.Id)
 | |
| 
 | |
| 		for _, core := range node.Cores {
 | |
| 			coreID := strconv.Itoa(core.Id)
 | |
| 
 | |
| 			for _, cache := range core.Caches {
 | |
| 				mValues = append(mValues,
 | |
| 					metricValue{
 | |
| 						value:     float64(cache.Size),
 | |
| 						labels:    []string{nodeID, coreID, cache.Type, strconv.Itoa(cache.Level)},
 | |
| 						timestamp: machineInfo.Timestamp,
 | |
| 					})
 | |
| 			}
 | |
| 			for _, cache := range core.UncoreCaches {
 | |
| 				mValues = append(mValues,
 | |
| 					metricValue{
 | |
| 						value:     float64(cache.Size),
 | |
| 						labels:    []string{nodeID, coreID, cache.Type, strconv.Itoa(cache.Level)},
 | |
| 						timestamp: machineInfo.Timestamp,
 | |
| 					})
 | |
| 			}
 | |
| 		}
 | |
| 
 | |
| 		for _, cache := range node.Caches {
 | |
| 			mValues = append(mValues,
 | |
| 				metricValue{
 | |
| 					value:     float64(cache.Size),
 | |
| 					labels:    []string{nodeID, emptyLabelValue, cache.Type, strconv.Itoa(cache.Level)},
 | |
| 					timestamp: machineInfo.Timestamp,
 | |
| 				})
 | |
| 		}
 | |
| 	}
 | |
| 	return mValues
 | |
| }
 | |
| 
 | |
| func getDistance(machineInfo *info.MachineInfo) metricValues {
 | |
| 	mValues := make(metricValues, 0, len(machineInfo.Topology)^2)
 | |
| 	for _, node := range machineInfo.Topology {
 | |
| 		nodeID := strconv.Itoa(node.Id)
 | |
| 		for i, target := range node.Distances {
 | |
| 			mValues = append(mValues,
 | |
| 				metricValue{
 | |
| 					value:     float64(target),
 | |
| 					labels:    []string{nodeID, strconv.Itoa(i)},
 | |
| 					timestamp: machineInfo.Timestamp,
 | |
| 				})
 | |
| 		}
 | |
| 	}
 | |
| 	return mValues
 | |
| }
 | 
