mirror of
				https://github.com/optim-enterprises-bv/kubernetes.git
				synced 2025-11-04 04:08:16 +00:00 
			
		
		
		
	Add metric job_pod_finished
To count the number of pods that the job controller successfully tracked with the JobTrackingWithFinalizers feature gate.
This commit is contained in:
		@@ -338,3 +338,10 @@ func (bci byCompletionIndex) Swap(i, j int) {
 | 
				
			|||||||
func (bci byCompletionIndex) Len() int {
 | 
					func (bci byCompletionIndex) Len() int {
 | 
				
			||||||
	return len(bci)
 | 
						return len(bci)
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					func completionModeStr(job *batch.Job) string {
 | 
				
			||||||
 | 
						if job.Spec.CompletionMode != nil {
 | 
				
			||||||
 | 
							return string(*job.Spec.CompletionMode)
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						return string(batch.NonIndexedCompletion)
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -897,6 +897,8 @@ func (jm *Controller) trackJobStatusAndRemoveFinalizers(job *batch.Job, pods []*
 | 
				
			|||||||
			uidsWithFinalizer.Insert(string(p.UID))
 | 
								uidsWithFinalizer.Insert(string(p.UID))
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
						// Shallow copy, as it will only be used to detect changes in the counters.
 | 
				
			||||||
 | 
						oldCounters := job.Status
 | 
				
			||||||
	if cleanUncountedPodsWithoutFinalizers(&job.Status, uidsWithFinalizer) {
 | 
						if cleanUncountedPodsWithoutFinalizers(&job.Status, uidsWithFinalizer) {
 | 
				
			||||||
		needsFlush = true
 | 
							needsFlush = true
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
@@ -951,7 +953,7 @@ func (jm *Controller) trackJobStatusAndRemoveFinalizers(job *batch.Job, pods []*
 | 
				
			|||||||
		job.Status.CompletedIndexes = succeededIndexes.String()
 | 
							job.Status.CompletedIndexes = succeededIndexes.String()
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
	var err error
 | 
						var err error
 | 
				
			||||||
	if needsFlush, err = jm.flushUncountedAndRemoveFinalizers(job, podsToRemoveFinalizer, uidsWithFinalizer, needsFlush); err != nil {
 | 
						if needsFlush, err = jm.flushUncountedAndRemoveFinalizers(job, podsToRemoveFinalizer, uidsWithFinalizer, &oldCounters, needsFlush); err != nil {
 | 
				
			||||||
		return err
 | 
							return err
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
	if jm.enactJobFinished(job, finishedCond) {
 | 
						if jm.enactJobFinished(job, finishedCond) {
 | 
				
			||||||
@@ -961,6 +963,7 @@ func (jm *Controller) trackJobStatusAndRemoveFinalizers(job *batch.Job, pods []*
 | 
				
			|||||||
		if err := jm.updateStatusHandler(job); err != nil {
 | 
							if err := jm.updateStatusHandler(job); err != nil {
 | 
				
			||||||
			return fmt.Errorf("removing uncounted pods from status: %w", err)
 | 
								return fmt.Errorf("removing uncounted pods from status: %w", err)
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
 | 
							recordJobPodFinished(job, oldCounters)
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
	return nil
 | 
						return nil
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
@@ -974,11 +977,14 @@ func (jm *Controller) trackJobStatusAndRemoveFinalizers(job *batch.Job, pods []*
 | 
				
			|||||||
// 4. (if not all removals succeeded) flush Job status again.
 | 
					// 4. (if not all removals succeeded) flush Job status again.
 | 
				
			||||||
// Returns whether there are pending changes in the Job status that need to be
 | 
					// Returns whether there are pending changes in the Job status that need to be
 | 
				
			||||||
// flushed in subsequent calls.
 | 
					// flushed in subsequent calls.
 | 
				
			||||||
func (jm *Controller) flushUncountedAndRemoveFinalizers(job *batch.Job, podsToRemoveFinalizer []*v1.Pod, uidsWithFinalizer sets.String, needsFlush bool) (bool, error) {
 | 
					func (jm *Controller) flushUncountedAndRemoveFinalizers(job *batch.Job, podsToRemoveFinalizer []*v1.Pod, uidsWithFinalizer sets.String, oldCounters *batch.JobStatus, needsFlush bool) (bool, error) {
 | 
				
			||||||
	if needsFlush {
 | 
						if needsFlush {
 | 
				
			||||||
		if err := jm.updateStatusHandler(job); err != nil {
 | 
							if err := jm.updateStatusHandler(job); err != nil {
 | 
				
			||||||
			return needsFlush, fmt.Errorf("adding uncounted pods to status: %w", err)
 | 
								return needsFlush, fmt.Errorf("adding uncounted pods to status: %w", err)
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
 | 
							recordJobPodFinished(job, *oldCounters)
 | 
				
			||||||
 | 
							// Shallow copy.
 | 
				
			||||||
 | 
							*oldCounters = job.Status
 | 
				
			||||||
		needsFlush = false
 | 
							needsFlush = false
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
	var rmErr error
 | 
						var rmErr error
 | 
				
			||||||
@@ -1545,3 +1551,11 @@ func ensureJobConditionStatus(list []batch.JobCondition, cType batch.JobConditio
 | 
				
			|||||||
	}
 | 
						}
 | 
				
			||||||
	return list, false
 | 
						return list, false
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					func recordJobPodFinished(job *batch.Job, oldCounters batch.JobStatus) {
 | 
				
			||||||
 | 
						completionMode := completionModeStr(job)
 | 
				
			||||||
 | 
						diff := job.Status.Succeeded - oldCounters.Succeeded
 | 
				
			||||||
 | 
						metrics.JobPodsFinished.WithLabelValues(completionMode, metrics.Succeeded).Add(float64(diff))
 | 
				
			||||||
 | 
						diff = job.Status.Failed - oldCounters.Failed
 | 
				
			||||||
 | 
						metrics.JobPodsFinished.WithLabelValues(completionMode, metrics.Failed).Add(float64(diff))
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -46,8 +46,10 @@ import (
 | 
				
			|||||||
	"k8s.io/client-go/tools/cache"
 | 
						"k8s.io/client-go/tools/cache"
 | 
				
			||||||
	"k8s.io/client-go/util/workqueue"
 | 
						"k8s.io/client-go/util/workqueue"
 | 
				
			||||||
	featuregatetesting "k8s.io/component-base/featuregate/testing"
 | 
						featuregatetesting "k8s.io/component-base/featuregate/testing"
 | 
				
			||||||
 | 
						metricstestutil "k8s.io/component-base/metrics/testutil"
 | 
				
			||||||
	_ "k8s.io/kubernetes/pkg/apis/core/install"
 | 
						_ "k8s.io/kubernetes/pkg/apis/core/install"
 | 
				
			||||||
	"k8s.io/kubernetes/pkg/controller"
 | 
						"k8s.io/kubernetes/pkg/controller"
 | 
				
			||||||
 | 
						"k8s.io/kubernetes/pkg/controller/job/metrics"
 | 
				
			||||||
	"k8s.io/kubernetes/pkg/controller/testutil"
 | 
						"k8s.io/kubernetes/pkg/controller/testutil"
 | 
				
			||||||
	"k8s.io/kubernetes/pkg/features"
 | 
						"k8s.io/kubernetes/pkg/features"
 | 
				
			||||||
	"k8s.io/utils/pointer"
 | 
						"k8s.io/utils/pointer"
 | 
				
			||||||
@@ -1514,19 +1516,20 @@ func TestTrackJobStatusAndRemoveFinalizers(t *testing.T) {
 | 
				
			|||||||
			clientSet := clientset.NewForConfigOrDie(&restclient.Config{Host: "", ContentConfig: restclient.ContentConfig{GroupVersion: &schema.GroupVersion{Group: "", Version: "v1"}}})
 | 
								clientSet := clientset.NewForConfigOrDie(&restclient.Config{Host: "", ContentConfig: restclient.ContentConfig{GroupVersion: &schema.GroupVersion{Group: "", Version: "v1"}}})
 | 
				
			||||||
			manager, _ := newControllerFromClient(clientSet, controller.NoResyncPeriodFunc)
 | 
								manager, _ := newControllerFromClient(clientSet, controller.NoResyncPeriodFunc)
 | 
				
			||||||
			fakePodControl := controller.FakePodControl{Err: tc.podControlErr}
 | 
								fakePodControl := controller.FakePodControl{Err: tc.podControlErr}
 | 
				
			||||||
 | 
								metrics.JobPodsFinished.Reset()
 | 
				
			||||||
			manager.podControl = &fakePodControl
 | 
								manager.podControl = &fakePodControl
 | 
				
			||||||
			var statusUpdates []batch.JobStatus
 | 
								var statusUpdates []batch.JobStatus
 | 
				
			||||||
			manager.updateStatusHandler = func(job *batch.Job) error {
 | 
								manager.updateStatusHandler = func(job *batch.Job) error {
 | 
				
			||||||
				statusUpdates = append(statusUpdates, *job.Status.DeepCopy())
 | 
									statusUpdates = append(statusUpdates, *job.Status.DeepCopy())
 | 
				
			||||||
				return tc.statusUpdateErr
 | 
									return tc.statusUpdateErr
 | 
				
			||||||
			}
 | 
								}
 | 
				
			||||||
 | 
								job := tc.job.DeepCopy()
 | 
				
			||||||
			if tc.job.Status.UncountedTerminatedPods == nil {
 | 
								if job.Status.UncountedTerminatedPods == nil {
 | 
				
			||||||
				tc.job.Status.UncountedTerminatedPods = &batch.UncountedTerminatedPods{}
 | 
									job.Status.UncountedTerminatedPods = &batch.UncountedTerminatedPods{}
 | 
				
			||||||
			}
 | 
								}
 | 
				
			||||||
			uncounted := newUncountedTerminatedPods(*tc.job.Status.UncountedTerminatedPods)
 | 
								uncounted := newUncountedTerminatedPods(*job.Status.UncountedTerminatedPods)
 | 
				
			||||||
			succeededIndexes := succeededIndexesFromJob(&tc.job)
 | 
								succeededIndexes := succeededIndexesFromJob(job)
 | 
				
			||||||
			err := manager.trackJobStatusAndRemoveFinalizers(&tc.job, tc.pods, succeededIndexes, *uncounted, tc.finishedCond, tc.needsFlush)
 | 
								err := manager.trackJobStatusAndRemoveFinalizers(job, tc.pods, succeededIndexes, *uncounted, tc.finishedCond, tc.needsFlush)
 | 
				
			||||||
			if !errors.Is(err, tc.wantErr) {
 | 
								if !errors.Is(err, tc.wantErr) {
 | 
				
			||||||
				t.Errorf("Got error %v, want %w", err, tc.wantErr)
 | 
									t.Errorf("Got error %v, want %w", err, tc.wantErr)
 | 
				
			||||||
			}
 | 
								}
 | 
				
			||||||
@@ -1537,6 +1540,25 @@ func TestTrackJobStatusAndRemoveFinalizers(t *testing.T) {
 | 
				
			|||||||
			if rmFinalizers != tc.wantRmFinalizers {
 | 
								if rmFinalizers != tc.wantRmFinalizers {
 | 
				
			||||||
				t.Errorf("Removed %d finalizers, want %d", rmFinalizers, tc.wantRmFinalizers)
 | 
									t.Errorf("Removed %d finalizers, want %d", rmFinalizers, tc.wantRmFinalizers)
 | 
				
			||||||
			}
 | 
								}
 | 
				
			||||||
 | 
								if tc.wantErr == nil {
 | 
				
			||||||
 | 
									completionMode := completionModeStr(job)
 | 
				
			||||||
 | 
									v, err := metricstestutil.GetCounterMetricValue(metrics.JobPodsFinished.WithLabelValues(completionMode, metrics.Succeeded))
 | 
				
			||||||
 | 
									if err != nil {
 | 
				
			||||||
 | 
										t.Fatalf("Obtaining succeeded job_pods_finished_total: %v", err)
 | 
				
			||||||
 | 
									}
 | 
				
			||||||
 | 
									newSucceeded := job.Status.Succeeded - tc.job.Status.Succeeded
 | 
				
			||||||
 | 
									if float64(newSucceeded) != v {
 | 
				
			||||||
 | 
										t.Errorf("Metric reports %.0f succeeded pods, want %d", v, newSucceeded)
 | 
				
			||||||
 | 
									}
 | 
				
			||||||
 | 
									v, err = metricstestutil.GetCounterMetricValue(metrics.JobPodsFinished.WithLabelValues(completionMode, metrics.Failed))
 | 
				
			||||||
 | 
									if err != nil {
 | 
				
			||||||
 | 
										t.Fatalf("Obtaining failed job_pods_finished_total: %v", err)
 | 
				
			||||||
 | 
									}
 | 
				
			||||||
 | 
									newFailed := job.Status.Failed - tc.job.Status.Failed
 | 
				
			||||||
 | 
									if float64(newFailed) != v {
 | 
				
			||||||
 | 
										t.Errorf("Metric reports %.0f failed pods, want %d", v, newFailed)
 | 
				
			||||||
 | 
									}
 | 
				
			||||||
 | 
								}
 | 
				
			||||||
		})
 | 
							})
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -68,10 +68,26 @@ var (
 | 
				
			|||||||
		},
 | 
							},
 | 
				
			||||||
		[]string{"completion_mode", "result"},
 | 
							[]string{"completion_mode", "result"},
 | 
				
			||||||
	)
 | 
						)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						// JobPodsFinished records the number of finished Pods that the job controller
 | 
				
			||||||
 | 
						// finished tracking.
 | 
				
			||||||
 | 
						// It only applies to Jobs that were created while the feature gate
 | 
				
			||||||
 | 
						// JobTrackingWithFinalizers was enabled.
 | 
				
			||||||
 | 
						// Possible label values:
 | 
				
			||||||
 | 
						//   completion_mode: Indexed, NonIndexed
 | 
				
			||||||
 | 
						//   result:          failed, succeeded
 | 
				
			||||||
 | 
						JobPodsFinished = metrics.NewCounterVec(
 | 
				
			||||||
 | 
							&metrics.CounterOpts{
 | 
				
			||||||
 | 
								Subsystem: JobControllerSubsystem,
 | 
				
			||||||
 | 
								Name:      "job_pods_finished_total",
 | 
				
			||||||
 | 
								Help:      "The number of finished Pods that are fully tracked",
 | 
				
			||||||
 | 
							},
 | 
				
			||||||
 | 
							[]string{"completion_mode", "result"})
 | 
				
			||||||
)
 | 
					)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
// Possible values for the "action" label in the above metrics.
 | 
					 | 
				
			||||||
const (
 | 
					const (
 | 
				
			||||||
 | 
						// Possible values for the "action" label in the above metrics.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	// JobSyncActionReconciling when the Job's pod creation/deletion expectations
 | 
						// JobSyncActionReconciling when the Job's pod creation/deletion expectations
 | 
				
			||||||
	// are unsatisfied and the controller is waiting for issued Pod
 | 
						// are unsatisfied and the controller is waiting for issued Pod
 | 
				
			||||||
	// creation/deletions to complete.
 | 
						// creation/deletions to complete.
 | 
				
			||||||
@@ -88,6 +104,11 @@ const (
 | 
				
			|||||||
	// if a Job is suspended or if the number of active Pods is more than
 | 
						// if a Job is suspended or if the number of active Pods is more than
 | 
				
			||||||
	// parallelism.
 | 
						// parallelism.
 | 
				
			||||||
	JobSyncActionPodsDeleted = "pods_deleted"
 | 
						JobSyncActionPodsDeleted = "pods_deleted"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						// Possible values for "result" label in the above metrics.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						Succeeded = "succeeded"
 | 
				
			||||||
 | 
						Failed    = "failed"
 | 
				
			||||||
)
 | 
					)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
var registerMetrics sync.Once
 | 
					var registerMetrics sync.Once
 | 
				
			||||||
@@ -98,5 +119,6 @@ func Register() {
 | 
				
			|||||||
		legacyregistry.MustRegister(JobSyncDurationSeconds)
 | 
							legacyregistry.MustRegister(JobSyncDurationSeconds)
 | 
				
			||||||
		legacyregistry.MustRegister(JobSyncNum)
 | 
							legacyregistry.MustRegister(JobSyncNum)
 | 
				
			||||||
		legacyregistry.MustRegister(JobFinishedNum)
 | 
							legacyregistry.MustRegister(JobFinishedNum)
 | 
				
			||||||
 | 
							legacyregistry.MustRegister(JobPodsFinished)
 | 
				
			||||||
	})
 | 
						})
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user