Merge pull request #40932 from peay/cronjob-max-finished-jobs

Automatic merge from submit-queue (batch tested with PRs 40932, 41896, 41815, 41309, 41628)

Modify CronJob API to add job history limits, cleanup jobs in controller

**What this PR does / why we need it**:
As discussed in #34710: this adds two limits to `CronJobSpec`, to limit the number of finished jobs created by a CronJob to keep.

**Which issue this PR fixes**: fixes #34710

**Special notes for your reviewer**:

cc @soltysh, please have a look and let me know what you think -- I'll then add end to end testing and update the doc in a separate commit. What is the timeline to get this into 1.6?

The plan:

- [x] API changes
  - [x] Changing versioned APIs
    - [x] `types.go`
    - [x] `defaults.go` (nothing to do)
    - [x] `conversion.go` (nothing to do?)
    - [x] `conversion_test.go` (nothing to do?)
  - [x] Changing the internal structure
    - [x] `types.go`
    - [x] `validation.go`
    - [x] `validation_test.go`
  - [x] Edit version conversions
    - [x] Edit (nothing to do?)
    - [x] Run `hack/update-codegen.sh`
  - [x] Generate protobuf objects
    - [x] Run `hack/update-generated-protobuf.sh`
  - [x] Generate json (un)marshaling code
    - [x] Run `hack/update-codecgen.sh`
  - [x] Update fuzzer
- [x] Actual logic
- [x] Unit tests
- [x] End to end tests
- [x] Documentation changes and API specs update in separate commit


**Release note**:

```release-note
Add configurable limits to CronJob resource to specify how many successful and failed jobs are preserved.
```
This commit is contained in:
Kubernetes Submit Queue
2017-02-26 08:09:54 -08:00
committed by GitHub
18 changed files with 965 additions and 208 deletions

View File

@@ -30,6 +30,7 @@ Just periodically list jobs and SJs, and then reconcile them.
import (
"fmt"
"sort"
"time"
"github.com/golang/glog"
@@ -92,13 +93,13 @@ func (jm *CronJobController) Run(stopCh <-chan struct{}) {
defer utilruntime.HandleCrash()
glog.Infof("Starting CronJob Manager")
// Check things every 10 second.
go wait.Until(jm.SyncAll, 10*time.Second, stopCh)
go wait.Until(jm.syncAll, 10*time.Second, stopCh)
<-stopCh
glog.Infof("Shutting down CronJob Manager")
}
// SyncAll lists all the CronJobs and Jobs and reconciles them.
func (jm *CronJobController) SyncAll() {
// syncAll lists all the CronJobs and Jobs and reconciles them.
func (jm *CronJobController) syncAll() {
sjl, err := jm.kubeClient.BatchV2alpha1().CronJobs(metav1.NamespaceAll).List(metav1.ListOptions{})
if err != nil {
glog.Errorf("Error listing cronjobs: %v", err)
@@ -119,24 +120,86 @@ func (jm *CronJobController) SyncAll() {
glog.V(4).Infof("Found %d groups", len(jobsBySj))
for _, sj := range sjs {
SyncOne(sj, jobsBySj[sj.UID], time.Now(), jm.jobControl, jm.sjControl, jm.podControl, jm.recorder)
syncOne(&sj, jobsBySj[sj.UID], time.Now(), jm.jobControl, jm.sjControl, jm.podControl, jm.recorder)
cleanupFinishedJobs(&sj, jobsBySj[sj.UID], jm.jobControl, jm.sjControl, jm.podControl, jm.recorder)
}
}
// SyncOne reconciles a CronJob with a list of any Jobs that it created.
// cleanupFinishedJobs cleanups finished jobs created by a CronJob
func cleanupFinishedJobs(sj *batch.CronJob, js []batch.Job, jc jobControlInterface, sjc sjControlInterface, pc podControlInterface, recorder record.EventRecorder) {
// If neither limits are active, there is no need to do anything.
if sj.Spec.FailedJobsHistoryLimit == nil && sj.Spec.SuccessfulJobsHistoryLimit == nil {
return
}
failedJobs := []batch.Job{}
succesfulJobs := []batch.Job{}
for _, job := range js {
isFinished, finishedStatus := getFinishedStatus(&job)
if isFinished && finishedStatus == batch.JobComplete {
succesfulJobs = append(succesfulJobs, job)
} else if isFinished && finishedStatus == batch.JobFailed {
failedJobs = append(failedJobs, job)
}
}
if sj.Spec.SuccessfulJobsHistoryLimit != nil {
removeOldestJobs(sj,
succesfulJobs,
jc,
pc,
*sj.Spec.SuccessfulJobsHistoryLimit,
recorder)
}
if sj.Spec.FailedJobsHistoryLimit != nil {
removeOldestJobs(sj,
failedJobs,
jc,
pc,
*sj.Spec.FailedJobsHistoryLimit,
recorder)
}
// Update the CronJob, in case jobs were removed from the list.
if _, err := sjc.UpdateStatus(sj); err != nil {
nameForLog := fmt.Sprintf("%s/%s", sj.Namespace, sj.Name)
glog.Infof("Unable to update status for %s (rv = %s): %v", nameForLog, sj.ResourceVersion, err)
}
}
// removeOldestJobs removes the oldest jobs from a list of jobs
func removeOldestJobs(sj *batch.CronJob, js []batch.Job, jc jobControlInterface, pc podControlInterface, maxJobs int32, recorder record.EventRecorder) {
numToDelete := len(js) - int(maxJobs)
if numToDelete <= 0 {
return
}
nameForLog := fmt.Sprintf("%s/%s", sj.Namespace, sj.Name)
glog.V(4).Infof("Cleaning up %d/%d jobs from %s", numToDelete, len(js), nameForLog)
sort.Sort(byJobStartTime(js))
for i := 0; i < numToDelete; i++ {
glog.V(4).Infof("Removing job %s from %s", js[i].Name, nameForLog)
deleteJob(sj, &js[i], jc, pc, recorder, "history limit reached")
}
}
// syncOne reconciles a CronJob with a list of any Jobs that it created.
// All known jobs created by "sj" should be included in "js".
// The current time is passed in to facilitate testing.
// It has no receiver, to facilitate testing.
func SyncOne(sj batch.CronJob, js []batch.Job, now time.Time, jc jobControlInterface, sjc sjControlInterface, pc podControlInterface, recorder record.EventRecorder) {
func syncOne(sj *batch.CronJob, js []batch.Job, now time.Time, jc jobControlInterface, sjc sjControlInterface, pc podControlInterface, recorder record.EventRecorder) {
nameForLog := fmt.Sprintf("%s/%s", sj.Namespace, sj.Name)
childrenJobs := make(map[types.UID]bool)
for i := range js {
j := js[i]
childrenJobs[j.ObjectMeta.UID] = true
found := inActiveList(sj, j.ObjectMeta.UID)
found := inActiveList(*sj, j.ObjectMeta.UID)
if !found && !IsJobFinished(&j) {
recorder.Eventf(&sj, v1.EventTypeWarning, "UnexpectedJob", "Saw a job that the controller did not create or forgot: %v", j.Name)
recorder.Eventf(sj, v1.EventTypeWarning, "UnexpectedJob", "Saw a job that the controller did not create or forgot: %v", j.Name)
// We found an unfinished job that has us as the parent, but it is not in our Active list.
// This could happen if we crashed right after creating the Job and before updating the status,
// or if our jobs list is newer than our sj status after a relist, or if someone intentionally created
@@ -148,9 +211,9 @@ func SyncOne(sj batch.CronJob, js []batch.Job, now time.Time, jc jobControlInter
// in the same namespace "adopt" that job. ReplicaSets and their Pods work the same way.
// TBS: how to update sj.Status.LastScheduleTime if the adopted job is newer than any we knew about?
} else if found && IsJobFinished(&j) {
deleteFromActiveList(&sj, j.ObjectMeta.UID)
deleteFromActiveList(sj, j.ObjectMeta.UID)
// TODO: event to call out failure vs success.
recorder.Eventf(&sj, v1.EventTypeNormal, "SawCompletedJob", "Saw completed job: %v", j.Name)
recorder.Eventf(sj, v1.EventTypeNormal, "SawCompletedJob", "Saw completed job: %v", j.Name)
}
}
@@ -159,25 +222,25 @@ func SyncOne(sj batch.CronJob, js []batch.Job, now time.Time, jc jobControlInter
// job running.
for _, j := range sj.Status.Active {
if found := childrenJobs[j.UID]; !found {
recorder.Eventf(&sj, v1.EventTypeNormal, "MissingJob", "Active job went missing: %v", j.Name)
deleteFromActiveList(&sj, j.UID)
recorder.Eventf(sj, v1.EventTypeNormal, "MissingJob", "Active job went missing: %v", j.Name)
deleteFromActiveList(sj, j.UID)
}
}
updatedSJ, err := sjc.UpdateStatus(&sj)
updatedSJ, err := sjc.UpdateStatus(sj)
if err != nil {
glog.Errorf("Unable to update status for %s (rv = %s): %v", nameForLog, sj.ResourceVersion, err)
return
}
sj = *updatedSJ
*sj = *updatedSJ
if sj.Spec.Suspend != nil && *sj.Spec.Suspend {
glog.V(4).Infof("Not starting job for %s because it is suspended", nameForLog)
return
}
times, err := getRecentUnmetScheduleTimes(sj, now)
times, err := getRecentUnmetScheduleTimes(*sj, now)
if err != nil {
recorder.Eventf(&sj, v1.EventTypeWarning, "FailedNeedsStart", "Cannot determine if job needs to be started: %v", err)
recorder.Eventf(sj, v1.EventTypeWarning, "FailedNeedsStart", "Cannot determine if job needs to be started: %v", err)
glog.Errorf("Cannot determine if %s needs to be started: %v", nameForLog, err)
}
// TODO: handle multiple unmet start times, from oldest to newest, updating status as needed.
@@ -224,73 +287,37 @@ func SyncOne(sj batch.CronJob, js []batch.Job, now time.Time, jc jobControlInter
// TODO: this should be replaced with server side job deletion
// currently this mimics JobReaper from pkg/kubectl/stop.go
glog.V(4).Infof("Deleting job %s of %s that was still running at next scheduled start time", j.Name, nameForLog)
job, err := jc.GetJob(j.Namespace, j.Name)
if err != nil {
recorder.Eventf(&sj, v1.EventTypeWarning, "FailedGet", "Get job: %v", err)
recorder.Eventf(sj, v1.EventTypeWarning, "FailedGet", "Get job: %v", err)
return
}
// scale job down to 0
if *job.Spec.Parallelism != 0 {
zero := int32(0)
job.Spec.Parallelism = &zero
job, err = jc.UpdateJob(job.Namespace, job)
if err != nil {
recorder.Eventf(&sj, v1.EventTypeWarning, "FailedUpdate", "Update job: %v", err)
return
}
}
// remove all pods...
selector, _ := metav1.LabelSelectorAsSelector(job.Spec.Selector)
options := metav1.ListOptions{LabelSelector: selector.String()}
podList, err := pc.ListPods(job.Namespace, options)
if err != nil {
recorder.Eventf(&sj, v1.EventTypeWarning, "FailedList", "List job-pods: %v", err)
}
errList := []error{}
for _, pod := range podList.Items {
glog.V(2).Infof("CronJob controller is deleting Pod %v/%v", pod.Namespace, pod.Name)
if err := pc.DeletePod(pod.Namespace, pod.Name); err != nil {
// ignores the error when the pod isn't found
if !errors.IsNotFound(err) {
errList = append(errList, err)
}
}
}
if len(errList) != 0 {
recorder.Eventf(&sj, v1.EventTypeWarning, "FailedDelete", "Deleted job-pods: %v", utilerrors.NewAggregate(errList))
if !deleteJob(sj, job, jc, pc, recorder, "") {
return
}
// ... the job itself...
if err := jc.DeleteJob(job.Namespace, job.Name); err != nil {
recorder.Eventf(&sj, v1.EventTypeWarning, "FailedDelete", "Deleted job: %v", err)
glog.Errorf("Error deleting job %s from %s: %v", job.Name, nameForLog, err)
return
}
// ... and its reference from active list
deleteFromActiveList(&sj, job.ObjectMeta.UID)
recorder.Eventf(&sj, v1.EventTypeNormal, "SuccessfulDelete", "Deleted job %v", j.Name)
}
}
jobReq, err := getJobFromTemplate(&sj, scheduledTime)
jobReq, err := getJobFromTemplate(sj, scheduledTime)
if err != nil {
glog.Errorf("Unable to make Job from template in %s: %v", nameForLog, err)
return
}
jobResp, err := jc.CreateJob(sj.Namespace, jobReq)
if err != nil {
recorder.Eventf(&sj, v1.EventTypeWarning, "FailedCreate", "Error creating job: %v", err)
recorder.Eventf(sj, v1.EventTypeWarning, "FailedCreate", "Error creating job: %v", err)
return
}
glog.V(4).Infof("Created Job %s for %s", jobResp.Name, nameForLog)
recorder.Eventf(&sj, v1.EventTypeNormal, "SuccessfulCreate", "Created job %v", jobResp.Name)
recorder.Eventf(sj, v1.EventTypeNormal, "SuccessfulCreate", "Created job %v", jobResp.Name)
// ------------------------------------------------------------------ //
// If this process restarts at this point (after posting a job, but
// before updating the status), then we might try to start the job on
// the next time. Actually, if we relist the SJs and Jobs on the next
// iteration of SyncAll, we might not see our own status update, and
// iteration of syncAll, we might not see our own status update, and
// then post one again. So, we need to use the job name as a lock to
// prevent us from making the job twice (name the job with hash of its
// scheduled time).
@@ -303,13 +330,64 @@ func SyncOne(sj batch.CronJob, js []batch.Job, now time.Time, jc jobControlInter
sj.Status.Active = append(sj.Status.Active, *ref)
}
sj.Status.LastScheduleTime = &metav1.Time{Time: scheduledTime}
if _, err := sjc.UpdateStatus(&sj); err != nil {
if _, err := sjc.UpdateStatus(sj); err != nil {
glog.Infof("Unable to update status for %s (rv = %s): %v", nameForLog, sj.ResourceVersion, err)
}
return
}
// deleteJob reaps a job, deleting the job, the pobs and the reference in the active list
func deleteJob(sj *batch.CronJob, job *batch.Job, jc jobControlInterface, pc podControlInterface, recorder record.EventRecorder, reason string) bool {
// TODO: this should be replaced with server side job deletion
// currencontinuetly this mimics JobReaper from pkg/kubectl/stop.go
nameForLog := fmt.Sprintf("%s/%s", sj.Namespace, sj.Name)
var err error
// scale job down to 0
if *job.Spec.Parallelism != 0 {
zero := int32(0)
job.Spec.Parallelism = &zero
job, err = jc.UpdateJob(job.Namespace, job)
if err != nil {
recorder.Eventf(sj, v1.EventTypeWarning, "FailedUpdate", "Update job: %v", err)
return false
}
}
// remove all pods...
selector, _ := metav1.LabelSelectorAsSelector(job.Spec.Selector)
options := metav1.ListOptions{LabelSelector: selector.String()}
podList, err := pc.ListPods(job.Namespace, options)
if err != nil {
recorder.Eventf(sj, v1.EventTypeWarning, "FailedList", "List job-pods: %v", err)
}
errList := []error{}
for _, pod := range podList.Items {
glog.V(2).Infof("CronJob controller is deleting Pod %v/%v", pod.Namespace, pod.Name)
if err := pc.DeletePod(pod.Namespace, pod.Name); err != nil {
// ignores the error when the pod isn't found
if !errors.IsNotFound(err) {
errList = append(errList, err)
}
}
}
if len(errList) != 0 {
recorder.Eventf(sj, v1.EventTypeWarning, "FailedDelete", "Deleted job-pods: %v", utilerrors.NewAggregate(errList))
return false
}
// ... the job itself...
if err := jc.DeleteJob(job.Namespace, job.Name); err != nil {
recorder.Eventf(sj, v1.EventTypeWarning, "FailedDelete", "Deleted job: %v", err)
glog.Errorf("Error deleting job %s from %s: %v", job.Name, nameForLog, err)
return false
}
// ... and its reference from active list
deleteFromActiveList(sj, job.ObjectMeta.UID)
recorder.Eventf(sj, v1.EventTypeNormal, "SuccessfulDelete", "Deleted job %v", job.Name)
return true
}
func getRef(object runtime.Object) (*v1.ObjectReference, error) {
return v1.GetReference(api.Scheme, object)
}

View File

@@ -17,6 +17,8 @@ limitations under the License.
package cronjob
import (
"sort"
"strconv"
"strings"
"testing"
"time"
@@ -81,6 +83,14 @@ func justAfterThePriorHour() time.Time {
return T1
}
func startTimeStringToTime(startTime string) time.Time {
T1, err := time.Parse(time.RFC3339, startTime)
if err != nil {
panic("test setup error")
}
return T1
}
// returns a cronJob with some fields filled in.
func cronJob() batch.CronJob {
return batch.CronJob{
@@ -270,7 +280,7 @@ func TestSyncOne_RunOrNot(t *testing.T) {
pc := &fakePodControl{}
recorder := record.NewFakeRecorder(10)
SyncOne(sj, js, tc.now, jc, sjc, pc, recorder)
syncOne(&sj, js, tc.now, jc, sjc, pc, recorder)
expectedCreates := 0
if tc.expectCreate {
expectedCreates = 1
@@ -320,10 +330,237 @@ func TestSyncOne_RunOrNot(t *testing.T) {
}
}
type CleanupJobSpec struct {
StartTime string
IsFinished bool
IsSuccessful bool
ExpectDelete bool
IsStillInActiveList bool // only when IsFinished is set
}
func TestCleanupFinishedJobs_DeleteOrNot(t *testing.T) {
limitThree := int32(3)
limitTwo := int32(2)
limitOne := int32(1)
limitZero := int32(0)
// Starting times are assumed to be sorted by increasing start time
// in all the test cases
testCases := map[string]struct {
jobSpecs []CleanupJobSpec
now time.Time
successfulJobsHistoryLimit *int32
failedJobsHistoryLimit *int32
expectActive int
}{
"success. job limit reached": {
[]CleanupJobSpec{
{"2016-05-19T04:00:00Z", T, T, T, F},
{"2016-05-19T05:00:00Z", T, T, T, F},
{"2016-05-19T06:00:00Z", T, T, F, F},
{"2016-05-19T07:00:00Z", T, T, F, F},
{"2016-05-19T08:00:00Z", F, F, F, F},
{"2016-05-19T09:00:00Z", T, F, F, F},
}, justBeforeTheHour(), &limitTwo, &limitOne, 1},
"success. jobs not processed by Sync yet": {
[]CleanupJobSpec{
{"2016-05-19T04:00:00Z", T, T, T, F},
{"2016-05-19T05:00:00Z", T, T, T, T},
{"2016-05-19T06:00:00Z", T, T, F, T},
{"2016-05-19T07:00:00Z", T, T, F, T},
{"2016-05-19T08:00:00Z", F, F, F, F},
{"2016-05-19T09:00:00Z", T, F, F, T},
}, justBeforeTheHour(), &limitTwo, &limitOne, 4},
"failed job limit reached": {
[]CleanupJobSpec{
{"2016-05-19T04:00:00Z", T, F, T, F},
{"2016-05-19T05:00:00Z", T, F, T, F},
{"2016-05-19T06:00:00Z", T, T, F, F},
{"2016-05-19T07:00:00Z", T, T, F, F},
{"2016-05-19T08:00:00Z", T, F, F, F},
{"2016-05-19T09:00:00Z", T, F, F, F},
}, justBeforeTheHour(), &limitTwo, &limitTwo, 0},
"success. job limit set to zero": {
[]CleanupJobSpec{
{"2016-05-19T04:00:00Z", T, T, T, F},
{"2016-05-19T05:00:00Z", T, F, T, F},
{"2016-05-19T06:00:00Z", T, T, T, F},
{"2016-05-19T07:00:00Z", T, T, T, F},
{"2016-05-19T08:00:00Z", F, F, F, F},
{"2016-05-19T09:00:00Z", T, F, F, F},
}, justBeforeTheHour(), &limitZero, &limitOne, 1},
"failed job limit set to zero": {
[]CleanupJobSpec{
{"2016-05-19T04:00:00Z", T, T, F, F},
{"2016-05-19T05:00:00Z", T, F, T, F},
{"2016-05-19T06:00:00Z", T, T, F, F},
{"2016-05-19T07:00:00Z", T, T, F, F},
{"2016-05-19T08:00:00Z", F, F, F, F},
{"2016-05-19T09:00:00Z", T, F, T, F},
}, justBeforeTheHour(), &limitThree, &limitZero, 1},
"no limits reached": {
[]CleanupJobSpec{
{"2016-05-19T04:00:00Z", T, T, F, F},
{"2016-05-19T05:00:00Z", T, F, F, F},
{"2016-05-19T06:00:00Z", T, T, F, F},
{"2016-05-19T07:00:00Z", T, T, F, F},
{"2016-05-19T08:00:00Z", T, F, F, F},
{"2016-05-19T09:00:00Z", T, F, F, F},
}, justBeforeTheHour(), &limitThree, &limitThree, 0},
// This test case should trigger the short-circuit
"limits disabled": {
[]CleanupJobSpec{
{"2016-05-19T04:00:00Z", T, T, F, F},
{"2016-05-19T05:00:00Z", T, F, F, F},
{"2016-05-19T06:00:00Z", T, T, F, F},
{"2016-05-19T07:00:00Z", T, T, F, F},
{"2016-05-19T08:00:00Z", T, F, F, F},
{"2016-05-19T09:00:00Z", T, F, F, F},
}, justBeforeTheHour(), nil, nil, 0},
"success limit disabled": {
[]CleanupJobSpec{
{"2016-05-19T04:00:00Z", T, T, F, F},
{"2016-05-19T05:00:00Z", T, F, F, F},
{"2016-05-19T06:00:00Z", T, T, F, F},
{"2016-05-19T07:00:00Z", T, T, F, F},
{"2016-05-19T08:00:00Z", T, F, F, F},
{"2016-05-19T09:00:00Z", T, F, F, F},
}, justBeforeTheHour(), nil, &limitThree, 0},
"failure limit disabled": {
[]CleanupJobSpec{
{"2016-05-19T04:00:00Z", T, T, F, F},
{"2016-05-19T05:00:00Z", T, F, F, F},
{"2016-05-19T06:00:00Z", T, T, F, F},
{"2016-05-19T07:00:00Z", T, T, F, F},
{"2016-05-19T08:00:00Z", T, F, F, F},
{"2016-05-19T09:00:00Z", T, F, F, F},
}, justBeforeTheHour(), &limitThree, nil, 0},
"no limits reached because still active": {
[]CleanupJobSpec{
{"2016-05-19T04:00:00Z", F, F, F, F},
{"2016-05-19T05:00:00Z", F, F, F, F},
{"2016-05-19T06:00:00Z", F, F, F, F},
{"2016-05-19T07:00:00Z", F, F, F, F},
{"2016-05-19T08:00:00Z", F, F, F, F},
{"2016-05-19T09:00:00Z", F, F, F, F},
}, justBeforeTheHour(), &limitZero, &limitZero, 6},
}
for name, tc := range testCases {
sj := cronJob()
suspend := false
sj.Spec.ConcurrencyPolicy = f
sj.Spec.Suspend = &suspend
sj.Spec.Schedule = onTheHour
sj.Spec.SuccessfulJobsHistoryLimit = tc.successfulJobsHistoryLimit
sj.Spec.FailedJobsHistoryLimit = tc.failedJobsHistoryLimit
var (
job *batch.Job
err error
)
// Set consistent timestamps for the CronJob
if len(tc.jobSpecs) != 0 {
firstTime := startTimeStringToTime(tc.jobSpecs[0].StartTime)
lastTime := startTimeStringToTime(tc.jobSpecs[len(tc.jobSpecs)-1].StartTime)
sj.ObjectMeta.CreationTimestamp = metav1.Time{Time: firstTime}
sj.Status.LastScheduleTime = &metav1.Time{Time: lastTime}
} else {
sj.ObjectMeta.CreationTimestamp = metav1.Time{Time: justBeforeTheHour()}
}
// Create jobs
js := []batch.Job{}
jobsToDelete := []string{}
sj.Status.Active = []v1.ObjectReference{}
for i, spec := range tc.jobSpecs {
job, err = getJobFromTemplate(&sj, startTimeStringToTime(spec.StartTime))
if err != nil {
t.Fatalf("%s: unexpected error creating a job from template: %v", name, err)
}
job.UID = types.UID(strconv.Itoa(i))
job.Namespace = ""
if spec.IsFinished {
var conditionType batch.JobConditionType
if spec.IsSuccessful {
conditionType = batch.JobComplete
} else {
conditionType = batch.JobFailed
}
condition := batch.JobCondition{Type: conditionType, Status: v1.ConditionTrue}
job.Status.Conditions = append(job.Status.Conditions, condition)
if spec.IsStillInActiveList {
sj.Status.Active = append(sj.Status.Active, v1.ObjectReference{UID: job.UID})
}
} else {
if spec.IsSuccessful || spec.IsStillInActiveList {
t.Errorf("%s: test setup error: this case makes no sense", name)
}
sj.Status.Active = append(sj.Status.Active, v1.ObjectReference{UID: job.UID})
}
js = append(js, *job)
if spec.ExpectDelete {
jobsToDelete = append(jobsToDelete, job.Name)
}
}
jc := &fakeJobControl{Job: job}
pc := &fakePodControl{}
sjc := &fakeSJControl{}
recorder := record.NewFakeRecorder(10)
cleanupFinishedJobs(&sj, js, jc, sjc, pc, recorder)
// Check we have actually deleted the correct jobs
if len(jc.DeleteJobName) != len(jobsToDelete) {
t.Errorf("%s: expected %d job deleted, actually %d", name, len(jobsToDelete), len(jc.DeleteJobName))
} else {
sort.Strings(jobsToDelete)
sort.Strings(jc.DeleteJobName)
for i, expectedJobName := range jobsToDelete {
if expectedJobName != jc.DeleteJobName[i] {
t.Errorf("%s: expected job %s deleted, actually %v -- %v vs %v", name, expectedJobName, jc.DeleteJobName[i], jc.DeleteJobName, jobsToDelete)
}
}
}
// Check for events
expectedEvents := len(jobsToDelete)
if len(recorder.Events) != expectedEvents {
t.Errorf("%s: expected %d event, actually %v", name, expectedEvents, len(recorder.Events))
}
// Check for jobs still in active list
numActive := 0
if len(sjc.Updates) != 0 {
numActive = len(sjc.Updates[len(sjc.Updates)-1].Status.Active)
}
if tc.expectActive != numActive {
t.Errorf("%s: expected Active size %d, got %d", name, tc.expectActive, numActive)
}
}
}
// TODO: simulation where the controller randomly doesn't run, and randomly has errors starting jobs or deleting jobs,
// but over time, all jobs run as expected (assuming Allow and no deadline).
// TestSyncOne_Status tests sj.UpdateStatus in SyncOne
// TestSyncOne_Status tests sj.UpdateStatus in syncOne
func TestSyncOne_Status(t *testing.T) {
finishedJob := newJob("1")
finishedJob.Status.Conditions = append(finishedJob.Status.Conditions, batch.JobCondition{Type: batch.JobComplete, Status: v1.ConditionTrue})
@@ -443,7 +680,7 @@ func TestSyncOne_Status(t *testing.T) {
recorder := record.NewFakeRecorder(10)
// Run the code
SyncOne(sj, jobs, tc.now, jc, sjc, pc, recorder)
syncOne(&sj, jobs, tc.now, jc, sjc, pc, recorder)
// Status update happens once when ranging through job list, and another one if create jobs.
expectUpdates := 1

View File

@@ -234,11 +234,34 @@ func makeCreatedByRefJson(object runtime.Object) (string, error) {
return string(createdByRefJson), nil
}
func IsJobFinished(j *batch.Job) bool {
func getFinishedStatus(j *batch.Job) (bool, batch.JobConditionType) {
for _, c := range j.Status.Conditions {
if (c.Type == batch.JobComplete || c.Type == batch.JobFailed) && c.Status == v1.ConditionTrue {
return true
return true, c.Type
}
}
return false
return false, ""
}
func IsJobFinished(j *batch.Job) bool {
isFinished, _ := getFinishedStatus(j)
return isFinished
}
// byJobStartTime sorts a list of jobs by start timestamp, using their names as a tie breaker.
type byJobStartTime []batch.Job
func (o byJobStartTime) Len() int { return len(o) }
func (o byJobStartTime) Swap(i, j int) { o[i], o[j] = o[j], o[i] }
func (o byJobStartTime) Less(i, j int) bool {
if o[j].Status.StartTime == nil {
return o[i].Status.StartTime != nil
}
if (*o[i].Status.StartTime).Equal(*o[j].Status.StartTime) {
return o[i].Name < o[j].Name
}
return (*o[i].Status.StartTime).Before(*o[j].Status.StartTime)
}