Reaper and scaler for jobs

This commit is contained in:
Maciej Szulik
2015-09-16 17:32:59 +02:00
parent 28585bc699
commit 48775319d9
9 changed files with 473 additions and 66 deletions

View File

@@ -23,6 +23,7 @@ import (
"k8s.io/kubernetes/pkg/api"
"k8s.io/kubernetes/pkg/api/errors"
"k8s.io/kubernetes/pkg/apis/experimental"
client "k8s.io/kubernetes/pkg/client/unversioned"
"k8s.io/kubernetes/pkg/util/wait"
)
@@ -70,8 +71,8 @@ func (c ControllerScaleError) Error() string {
c.ActualError, c.ResourceVersion)
}
// Validate ensures that the preconditions match. Returns nil if they are valid, an error otherwise
func (precondition *ScalePrecondition) Validate(controller *api.ReplicationController) error {
// ValidateReplicationController ensures that the preconditions match. Returns nil if they are valid, an error otherwise
func (precondition *ScalePrecondition) ValidateReplicationController(controller *api.ReplicationController) error {
if precondition.Size != -1 && controller.Spec.Replicas != precondition.Size {
return PreconditionError{"replicas", strconv.Itoa(precondition.Size), strconv.Itoa(controller.Spec.Replicas)}
}
@@ -81,6 +82,20 @@ func (precondition *ScalePrecondition) Validate(controller *api.ReplicationContr
return nil
}
// ValidateJob ensures that the preconditions match. Returns nil if they are valid, an error otherwise
func (precondition *ScalePrecondition) ValidateJob(job *experimental.Job) error {
if precondition.Size != -1 && job.Spec.Parallelism == nil {
return PreconditionError{"parallelism", strconv.Itoa(precondition.Size), "nil"}
}
if precondition.Size != -1 && *job.Spec.Parallelism != precondition.Size {
return PreconditionError{"parallelism", strconv.Itoa(precondition.Size), strconv.Itoa(*job.Spec.Parallelism)}
}
if precondition.ResourceVersion != "" && job.ResourceVersion != precondition.ResourceVersion {
return PreconditionError{"resource version", precondition.ResourceVersion, job.ResourceVersion}
}
return nil
}
type Scaler interface {
// Scale scales the named resource after checking preconditions. It optionally
// retries in the event of resource version mismatch (if retry is not nil),
@@ -88,19 +103,24 @@ type Scaler interface {
Scale(namespace, name string, newSize uint, preconditions *ScalePrecondition, retry, wait *RetryParams) error
// ScaleSimple does a simple one-shot attempt at scaling - not useful on it's own, but
// a necessary building block for Scale
ScaleSimple(namespace, name string, preconditions *ScalePrecondition, newSize uint) (string, error)
ScaleSimple(namespace, name string, preconditions *ScalePrecondition, newSize uint) error
}
func ScalerFor(kind string, c ScalerClient) (Scaler, error) {
func ScalerFor(kind string, c client.Interface) (Scaler, error) {
switch kind {
case "ReplicationController":
return &ReplicationControllerScaler{c}, nil
case "Job":
return &JobScaler{c}, nil
}
return nil, fmt.Errorf("no scaler has been implemented for %q", kind)
}
type ReplicationControllerScaler struct {
c ScalerClient
c client.Interface
}
type JobScaler struct {
c client.Interface
}
// RetryParams encapsulates the retry parameters used by kubectl's scaler.
@@ -115,7 +135,7 @@ func NewRetryParams(interval, timeout time.Duration) *RetryParams {
// ScaleCondition is a closure around Scale that facilitates retries via util.wait
func ScaleCondition(r Scaler, precondition *ScalePrecondition, namespace, name string, count uint) wait.ConditionFunc {
return func() (bool, error) {
_, err := r.ScaleSimple(namespace, name, precondition, count)
err := r.ScaleSimple(namespace, name, precondition, count)
switch e, _ := err.(ControllerScaleError); err.(type) {
case nil:
return true, nil
@@ -132,26 +152,26 @@ func ScaleCondition(r Scaler, precondition *ScalePrecondition, namespace, name s
}
}
func (scaler *ReplicationControllerScaler) ScaleSimple(namespace, name string, preconditions *ScalePrecondition, newSize uint) (string, error) {
controller, err := scaler.c.GetReplicationController(namespace, name)
func (scaler *ReplicationControllerScaler) ScaleSimple(namespace, name string, preconditions *ScalePrecondition, newSize uint) error {
controller, err := scaler.c.ReplicationControllers(namespace).Get(name)
if err != nil {
return "", ControllerScaleError{ControllerScaleGetFailure, "Unknown", err}
return ControllerScaleError{ControllerScaleGetFailure, "Unknown", err}
}
if preconditions != nil {
if err := preconditions.Validate(controller); err != nil {
return "", err
if err := preconditions.ValidateReplicationController(controller); err != nil {
return err
}
}
controller.Spec.Replicas = int(newSize)
// TODO: do retry on 409 errors here?
if _, err := scaler.c.UpdateReplicationController(namespace, controller); err != nil {
if _, err := scaler.c.ReplicationControllers(namespace).Update(controller); err != nil {
if errors.IsInvalid(err) {
return "", ControllerScaleError{ControllerScaleUpdateInvalidFailure, controller.ResourceVersion, err}
return ControllerScaleError{ControllerScaleUpdateInvalidFailure, controller.ResourceVersion, err}
}
return "", ControllerScaleError{ControllerScaleUpdateFailure, controller.ResourceVersion, err}
return ControllerScaleError{ControllerScaleUpdateFailure, controller.ResourceVersion, err}
}
// TODO: do a better job of printing objects here.
return "scaled", nil
return nil
}
// Scale updates a ReplicationController to a new size, with optional precondition check (if preconditions is not nil),
@@ -170,40 +190,61 @@ func (scaler *ReplicationControllerScaler) Scale(namespace, name string, newSize
return err
}
if waitForReplicas != nil {
rc, err := scaler.c.GetReplicationController(namespace, name)
rc, err := scaler.c.ReplicationControllers(namespace).Get(name)
if err != nil {
return err
}
return wait.Poll(waitForReplicas.Interval, waitForReplicas.Timeout,
scaler.c.ControllerHasDesiredReplicas(rc))
client.ControllerHasDesiredReplicas(scaler.c, rc))
}
return nil
}
// ScalerClient abstracts access to ReplicationControllers.
type ScalerClient interface {
GetReplicationController(namespace, name string) (*api.ReplicationController, error)
UpdateReplicationController(namespace string, rc *api.ReplicationController) (*api.ReplicationController, error)
ControllerHasDesiredReplicas(rc *api.ReplicationController) wait.ConditionFunc
// ScaleSimple is responsible for updating job's parallelism.
func (scaler *JobScaler) ScaleSimple(namespace, name string, preconditions *ScalePrecondition, newSize uint) error {
job, err := scaler.c.Experimental().Jobs(namespace).Get(name)
if err != nil {
return ControllerScaleError{ControllerScaleGetFailure, "Unknown", err}
}
if preconditions != nil {
if err := preconditions.ValidateJob(job); err != nil {
return err
}
}
parallelism := int(newSize)
job.Spec.Parallelism = &parallelism
if _, err := scaler.c.Experimental().Jobs(namespace).Update(job); err != nil {
if errors.IsInvalid(err) {
return ControllerScaleError{ControllerScaleUpdateInvalidFailure, job.ResourceVersion, err}
}
return ControllerScaleError{ControllerScaleUpdateFailure, job.ResourceVersion, err}
}
return nil
}
func NewScalerClient(c client.Interface) ScalerClient {
return &realScalerClient{c}
}
// realScalerClient is a ScalerClient which uses a Kube client.
type realScalerClient struct {
client client.Interface
}
func (c *realScalerClient) GetReplicationController(namespace, name string) (*api.ReplicationController, error) {
return c.client.ReplicationControllers(namespace).Get(name)
}
func (c *realScalerClient) UpdateReplicationController(namespace string, rc *api.ReplicationController) (*api.ReplicationController, error) {
return c.client.ReplicationControllers(namespace).Update(rc)
}
func (c *realScalerClient) ControllerHasDesiredReplicas(rc *api.ReplicationController) wait.ConditionFunc {
return client.ControllerHasDesiredReplicas(c.client, rc)
// Scale updates a Job to a new size, with optional precondition check (if preconditions is not nil),
// optional retries (if retry is not nil), and then optionally waits for parallelism to reach desired
// number, which can be less than requested based on job's current progress.
func (scaler *JobScaler) Scale(namespace, name string, newSize uint, preconditions *ScalePrecondition, retry, waitForReplicas *RetryParams) error {
if preconditions == nil {
preconditions = &ScalePrecondition{-1, ""}
}
if retry == nil {
// Make it try only once, immediately
retry = &RetryParams{Interval: time.Millisecond, Timeout: time.Millisecond}
}
cond := ScaleCondition(scaler, preconditions, namespace, name, newSize)
if err := wait.Poll(retry.Interval, retry.Timeout, cond); err != nil {
return err
}
if waitForReplicas != nil {
job, err := scaler.c.Experimental().Jobs(namespace).Get(name)
if err != nil {
return err
}
return wait.Poll(waitForReplicas.Interval, waitForReplicas.Timeout,
client.JobHasDesiredParallelism(scaler.c, job))
}
return nil
}