mirror of
https://github.com/optim-enterprises-bv/kubernetes.git
synced 2025-11-03 19:58:17 +00:00
Provide backpressure to clients when etcd goes down
When etcd is down today we don't specifically handle the error involved, which means clients get a generic 500 error. This commit adds a formal error type internally for both WatchExpired and EtcdUnreachable, and then converts them to api/errors before returning to the client. It also upgrades the client to retry on any 429 or 5xx error that has a Retry-After header, instead of just 429. In combination, this allows the apiserver to exert backpressure on controllers that are hotlooping. Picked 2 seconds by default, but we could potentially ramp that up even further in a future iteration.
This commit is contained in:
@@ -21,12 +21,27 @@ import (
|
||||
etcdstorage "k8s.io/kubernetes/pkg/storage/etcd"
|
||||
)
|
||||
|
||||
// InterpretListError converts a generic etcd error on a retrieval
|
||||
// operation into the appropriate API error.
|
||||
func InterpretListError(err error, kind string) error {
|
||||
switch {
|
||||
case etcdstorage.IsEtcdNotFound(err):
|
||||
return errors.NewNotFound(kind, "")
|
||||
case etcdstorage.IsEtcdUnreachable(err):
|
||||
return errors.NewServerTimeout(kind, "list", 2) // TODO: make configurable or handled at a higher level
|
||||
default:
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
// InterpretGetError converts a generic etcd error on a retrieval
|
||||
// operation into the appropriate API error.
|
||||
func InterpretGetError(err error, kind, name string) error {
|
||||
switch {
|
||||
case etcdstorage.IsEtcdNotFound(err):
|
||||
return errors.NewNotFound(kind, name)
|
||||
case etcdstorage.IsEtcdUnreachable(err):
|
||||
return errors.NewServerTimeout(kind, "get", 2) // TODO: make configurable or handled at a higher level
|
||||
default:
|
||||
return err
|
||||
}
|
||||
@@ -38,6 +53,8 @@ func InterpretCreateError(err error, kind, name string) error {
|
||||
switch {
|
||||
case etcdstorage.IsEtcdNodeExist(err):
|
||||
return errors.NewAlreadyExists(kind, name)
|
||||
case etcdstorage.IsEtcdUnreachable(err):
|
||||
return errors.NewServerTimeout(kind, "create", 2) // TODO: make configurable or handled at a higher level
|
||||
default:
|
||||
return err
|
||||
}
|
||||
@@ -49,6 +66,8 @@ func InterpretUpdateError(err error, kind, name string) error {
|
||||
switch {
|
||||
case etcdstorage.IsEtcdTestFailed(err), etcdstorage.IsEtcdNodeExist(err):
|
||||
return errors.NewConflict(kind, name, err)
|
||||
case etcdstorage.IsEtcdUnreachable(err):
|
||||
return errors.NewServerTimeout(kind, "update", 2) // TODO: make configurable or handled at a higher level
|
||||
default:
|
||||
return err
|
||||
}
|
||||
@@ -60,6 +79,8 @@ func InterpretDeleteError(err error, kind, name string) error {
|
||||
switch {
|
||||
case etcdstorage.IsEtcdNotFound(err):
|
||||
return errors.NewNotFound(kind, name)
|
||||
case etcdstorage.IsEtcdUnreachable(err):
|
||||
return errors.NewServerTimeout(kind, "delete", 2) // TODO: make configurable or handled at a higher level
|
||||
default:
|
||||
return err
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user