Change kubelet metrics to conform guideline

This commit is contained in:
danielqsj
2019-01-02 10:45:41 +08:00
parent 2de487c91f
commit 9fd99a48f5
9 changed files with 181 additions and 45 deletions

View File

@@ -31,30 +31,42 @@ import (
)
const (
KubeletSubsystem = "kubelet"
NodeNameKey = "node_name"
NodeLabelKey = "node"
PodWorkerLatencyKey = "pod_worker_latency_microseconds"
PodStartLatencyKey = "pod_start_latency_microseconds"
CgroupManagerOperationsKey = "cgroup_manager_latency_microseconds"
PodWorkerStartLatencyKey = "pod_worker_start_latency_microseconds"
PLEGRelistLatencyKey = "pleg_relist_latency_microseconds"
PLEGDiscardEventsKey = "pleg_discard_events"
PLEGRelistIntervalKey = "pleg_relist_interval_microseconds"
EvictionStatsAgeKey = "eviction_stats_age_microseconds"
VolumeStatsCapacityBytesKey = "volume_stats_capacity_bytes"
VolumeStatsAvailableBytesKey = "volume_stats_available_bytes"
VolumeStatsUsedBytesKey = "volume_stats_used_bytes"
VolumeStatsInodesKey = "volume_stats_inodes"
VolumeStatsInodesFreeKey = "volume_stats_inodes_free"
VolumeStatsInodesUsedKey = "volume_stats_inodes_used"
KubeletSubsystem = "kubelet"
NodeNameKey = "node_name"
NodeLabelKey = "node"
PodWorkerLatencyKey = "pod_worker_latency_seconds"
PodStartLatencyKey = "pod_start_latency_seconds"
CgroupManagerOperationsKey = "cgroup_manager_latency_seconds"
PodWorkerStartLatencyKey = "pod_worker_start_latency_seconds"
PLEGRelistLatencyKey = "pleg_relist_latency_seconds"
PLEGDiscardEventsKey = "pleg_discard_events"
PLEGRelistIntervalKey = "pleg_relist_interval_seconds"
EvictionStatsAgeKey = "eviction_stats_age_seconds"
DeprecatedPodWorkerLatencyKey = "pod_worker_latency_microseconds"
DeprecatedPodStartLatencyKey = "pod_start_latency_microseconds"
DeprecatedCgroupManagerOperationsKey = "cgroup_manager_latency_microseconds"
DeprecatedPodWorkerStartLatencyKey = "pod_worker_start_latency_microseconds"
DeprecatedPLEGRelistLatencyKey = "pleg_relist_latency_microseconds"
DeprecatedPLEGRelistIntervalKey = "pleg_relist_interval_microseconds"
DeprecatedEvictionStatsAgeKey = "eviction_stats_age_microseconds"
VolumeStatsCapacityBytesKey = "volume_stats_capacity_bytes"
VolumeStatsAvailableBytesKey = "volume_stats_available_bytes"
VolumeStatsUsedBytesKey = "volume_stats_used_bytes"
VolumeStatsInodesKey = "volume_stats_inodes"
VolumeStatsInodesFreeKey = "volume_stats_inodes_free"
VolumeStatsInodesUsedKey = "volume_stats_inodes_used"
// Metrics keys of remote runtime operations
RuntimeOperationsKey = "runtime_operations"
RuntimeOperationsLatencyKey = "runtime_operations_latency_microseconds"
RuntimeOperationsErrorsKey = "runtime_operations_errors"
RuntimeOperationsKey = "runtime_operations_total"
RuntimeOperationsLatencyKey = "runtime_operations_latency_seconds"
RuntimeOperationsErrorsKey = "runtime_operations_errors_total"
DeprecatedRuntimeOperationsKey = "runtime_operations"
DeprecatedRuntimeOperationsLatencyKey = "runtime_operations_latency_microseconds"
DeprecatedRuntimeOperationsErrorsKey = "runtime_operations_errors"
// Metrics keys of device plugin operations
DevicePluginRegistrationCountKey = "device_plugin_registration_count"
DevicePluginAllocationLatencyKey = "device_plugin_alloc_latency_microseconds"
DevicePluginRegistrationCountKey = "device_plugin_registration_total"
DevicePluginAllocationLatencyKey = "device_plugin_alloc_latency_seconds"
DeprecatedDevicePluginRegistrationCountKey = "device_plugin_registration_count"
DeprecatedDevicePluginAllocationLatencyKey = "device_plugin_alloc_latency_microseconds"
// Metric keys for node config
AssignedConfigKey = "node_config_assigned"
@@ -92,7 +104,7 @@ var (
prometheus.SummaryOpts{
Subsystem: KubeletSubsystem,
Name: PodWorkerLatencyKey,
Help: "Latency in microseconds to sync a single pod. Broken down by operation type: create, update, or sync",
Help: "Latency in seconds to sync a single pod. Broken down by operation type: create, update, or sync",
},
[]string{"operation_type"},
)
@@ -100,14 +112,14 @@ var (
prometheus.SummaryOpts{
Subsystem: KubeletSubsystem,
Name: PodStartLatencyKey,
Help: "Latency in microseconds for a single pod to go from pending to running.",
Help: "Latency in seconds for a single pod to go from pending to running.",
},
)
CgroupManagerLatency = prometheus.NewSummaryVec(
prometheus.SummaryOpts{
Subsystem: KubeletSubsystem,
Name: CgroupManagerOperationsKey,
Help: "Latency in microseconds for cgroup manager operations. Broken down by method.",
Help: "Latency in seconds for cgroup manager operations. Broken down by method.",
},
[]string{"operation_type"},
)
@@ -115,14 +127,14 @@ var (
prometheus.SummaryOpts{
Subsystem: KubeletSubsystem,
Name: PodWorkerStartLatencyKey,
Help: "Latency in microseconds from seeing a pod to starting a worker.",
Help: "Latency in seconds from seeing a pod to starting a worker.",
},
)
PLEGRelistLatency = prometheus.NewSummary(
prometheus.SummaryOpts{
Subsystem: KubeletSubsystem,
Name: PLEGRelistLatencyKey,
Help: "Latency in microseconds for relisting pods in PLEG.",
Help: "Latency in seconds for relisting pods in PLEG.",
},
)
PLEGDiscardEvents = prometheus.NewCounterVec(
@@ -137,7 +149,7 @@ var (
prometheus.SummaryOpts{
Subsystem: KubeletSubsystem,
Name: PLEGRelistIntervalKey,
Help: "Interval in microseconds between relisting in PLEG.",
Help: "Interval in seconds between relisting in PLEG.",
},
)
// Metrics of remote runtime operations.
@@ -153,7 +165,7 @@ var (
prometheus.SummaryOpts{
Subsystem: KubeletSubsystem,
Name: RuntimeOperationsLatencyKey,
Help: "Latency in microseconds of runtime operations. Broken down by operation type.",
Help: "Latency in seconds of runtime operations. Broken down by operation type.",
},
[]string{"operation_type"},
)
@@ -185,6 +197,99 @@ var (
prometheus.SummaryOpts{
Subsystem: KubeletSubsystem,
Name: DevicePluginAllocationLatencyKey,
Help: "Latency in seconds to serve a device plugin Allocation request. Broken down by resource name.",
},
[]string{"resource_name"},
)
DeprecatedPodWorkerLatency = prometheus.NewSummaryVec(
prometheus.SummaryOpts{
Subsystem: KubeletSubsystem,
Name: DeprecatedPodWorkerLatencyKey,
Help: "Latency in microseconds to sync a single pod. Broken down by operation type: create, update, or sync",
},
[]string{"operation_type"},
)
DeprecatedPodStartLatency = prometheus.NewSummary(
prometheus.SummaryOpts{
Subsystem: KubeletSubsystem,
Name: DeprecatedPodStartLatencyKey,
Help: "Latency in microseconds for a single pod to go from pending to running.",
},
)
DeprecatedCgroupManagerLatency = prometheus.NewSummaryVec(
prometheus.SummaryOpts{
Subsystem: KubeletSubsystem,
Name: DeprecatedCgroupManagerOperationsKey,
Help: "Latency in microseconds for cgroup manager operations. Broken down by method.",
},
[]string{"operation_type"},
)
DeprecatedPodWorkerStartLatency = prometheus.NewSummary(
prometheus.SummaryOpts{
Subsystem: KubeletSubsystem,
Name: DeprecatedPodWorkerStartLatencyKey,
Help: "Latency in microseconds from seeing a pod to starting a worker.",
},
)
DeprecatedPLEGRelistLatency = prometheus.NewSummary(
prometheus.SummaryOpts{
Subsystem: KubeletSubsystem,
Name: DeprecatedPLEGRelistLatencyKey,
Help: "Latency in microseconds for relisting pods in PLEG.",
},
)
DeprecatedPLEGRelistInterval = prometheus.NewSummary(
prometheus.SummaryOpts{
Subsystem: KubeletSubsystem,
Name: DeprecatedPLEGRelistIntervalKey,
Help: "Interval in microseconds between relisting in PLEG.",
},
)
DeprecatedRuntimeOperations = prometheus.NewCounterVec(
prometheus.CounterOpts{
Subsystem: KubeletSubsystem,
Name: DeprecatedRuntimeOperationsKey,
Help: "Cumulative number of runtime operations by operation type.",
},
[]string{"operation_type"},
)
DeprecatedRuntimeOperationsLatency = prometheus.NewSummaryVec(
prometheus.SummaryOpts{
Subsystem: KubeletSubsystem,
Name: DeprecatedRuntimeOperationsLatencyKey,
Help: "Latency in microseconds of runtime operations. Broken down by operation type.",
},
[]string{"operation_type"},
)
DeprecatedRuntimeOperationsErrors = prometheus.NewCounterVec(
prometheus.CounterOpts{
Subsystem: KubeletSubsystem,
Name: DeprecatedRuntimeOperationsErrorsKey,
Help: "Cumulative number of runtime operation errors by operation type.",
},
[]string{"operation_type"},
)
DeprecatedEvictionStatsAge = prometheus.NewSummaryVec(
prometheus.SummaryOpts{
Subsystem: KubeletSubsystem,
Name: DeprecatedEvictionStatsAgeKey,
Help: "Time between when stats are collected, and when pod is evicted based on those stats by eviction signal",
},
[]string{"eviction_signal"},
)
DeprecatedDevicePluginRegistrationCount = prometheus.NewCounterVec(
prometheus.CounterOpts{
Subsystem: KubeletSubsystem,
Name: DeprecatedDevicePluginRegistrationCountKey,
Help: "Cumulative number of device plugin registrations. Broken down by resource name.",
},
[]string{"resource_name"},
)
DeprecatedDevicePluginAllocationLatency = prometheus.NewSummaryVec(
prometheus.SummaryOpts{
Subsystem: KubeletSubsystem,
Name: DeprecatedDevicePluginAllocationLatencyKey,
Help: "Latency in microseconds to serve a device plugin Allocation request. Broken down by resource name.",
},
[]string{"resource_name"},
@@ -263,6 +368,18 @@ func Register(containerCache kubecontainer.RuntimeCache, collectors ...prometheu
prometheus.MustRegister(EvictionStatsAge)
prometheus.MustRegister(DevicePluginRegistrationCount)
prometheus.MustRegister(DevicePluginAllocationLatency)
prometheus.MustRegister(DeprecatedPodWorkerLatency)
prometheus.MustRegister(DeprecatedPodStartLatency)
prometheus.MustRegister(DeprecatedCgroupManagerLatency)
prometheus.MustRegister(DeprecatedPodWorkerStartLatency)
prometheus.MustRegister(DeprecatedPLEGRelistLatency)
prometheus.MustRegister(DeprecatedPLEGRelistInterval)
prometheus.MustRegister(DeprecatedRuntimeOperations)
prometheus.MustRegister(DeprecatedRuntimeOperationsLatency)
prometheus.MustRegister(DeprecatedRuntimeOperationsErrors)
prometheus.MustRegister(DeprecatedEvictionStatsAge)
prometheus.MustRegister(DeprecatedDevicePluginRegistrationCount)
prometheus.MustRegister(DeprecatedDevicePluginAllocationLatency)
if utilfeature.DefaultFeatureGate.Enabled(features.DynamicKubeletConfig) {
prometheus.MustRegister(AssignedConfig)
prometheus.MustRegister(ActiveConfig)
@@ -280,6 +397,11 @@ func SinceInMicroseconds(start time.Time) float64 {
return float64(time.Since(start).Nanoseconds() / time.Microsecond.Nanoseconds())
}
// Gets the time since the specified start in seconds.
func SinceInSeconds(start time.Time) float64 {
return time.Since(start).Seconds()
}
func newPodAndContainerCollector(containerCache kubecontainer.RuntimeCache) *podAndContainerCollector {
return &podAndContainerCollector{
containerCache: containerCache,