mirror of
https://github.com/optim-enterprises-bv/kubernetes.git
synced 2025-11-02 03:08:15 +00:00
Add sysctl api, validation & Docker support
This commit is contained in:
committed by
Dr. Stefan Schimanski
parent
c5e7e5124a
commit
bea189e9c9
@@ -438,6 +438,20 @@ const (
|
||||
// PreferAvoidPodsAnnotationKey represents the key of preferAvoidPods data (json serialized)
|
||||
// in the Annotations of a Node.
|
||||
PreferAvoidPodsAnnotationKey string = "scheduler.alpha.kubernetes.io/preferAvoidPods"
|
||||
|
||||
// SysctlsPodAnnotationKey represents the key of sysctls which are set for the infrastructure
|
||||
// container of a pod. The annotation value is a comma separated list of sysctl_name=value
|
||||
// key-value pairs. Only a limited set of whitelisted and isolated sysctls is supported by
|
||||
// the kubelet. Pods with other sysctls will fail to launch.
|
||||
SysctlsPodAnnotationKey string = "security.alpha.kubernetes.io/sysctls"
|
||||
|
||||
// UnsafeSysctlsPodAnnotationKey represents the key of sysctls which are set for the infrastructure
|
||||
// container of a pod. The annotation value is a comma separated list of sysctl_name=value
|
||||
// key-value pairs. Unsafe sysctls must be explicitly enabled for a kubelet. They are properly
|
||||
// namespaced to a pod or a container, but their isolation is usually unclear or weak. Their use
|
||||
// is at-your-own-risk. Pods that attempt to set an unsafe sysctl that is not enabled for a kubelet
|
||||
// will fail to launch.
|
||||
UnsafeSysctlsPodAnnotationKey string = "security.alpha.kubernetes.io/unsafe-sysctls"
|
||||
)
|
||||
|
||||
// GetAffinityFromPod gets the json serialized affinity data from Pod.Annotations
|
||||
@@ -522,3 +536,51 @@ func GetAvoidPodsFromNodeAnnotations(annotations map[string]string) (AvoidPods,
|
||||
}
|
||||
return avoidPods, nil
|
||||
}
|
||||
|
||||
// SysctlsFromPodAnnotations parses the sysctl annotations into a slice of safe Sysctls
|
||||
// and a slice of unsafe Sysctls. This is only a convenience wrapper around
|
||||
// SysctlsFromPodAnnotation.
|
||||
func SysctlsFromPodAnnotations(a map[string]string) ([]Sysctl, []Sysctl, error) {
|
||||
safe, err := SysctlsFromPodAnnotation(a[SysctlsPodAnnotationKey])
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
unsafe, err := SysctlsFromPodAnnotation(a[UnsafeSysctlsPodAnnotationKey])
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
|
||||
return safe, unsafe, nil
|
||||
}
|
||||
|
||||
// SysctlsFromPodAnnotation parses an annotation value into a slice of Sysctls.
|
||||
func SysctlsFromPodAnnotation(annotation string) ([]Sysctl, error) {
|
||||
if len(annotation) == 0 {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
kvs := strings.Split(annotation, ",")
|
||||
sysctls := make([]Sysctl, len(kvs))
|
||||
for i, kv := range kvs {
|
||||
cs := strings.Split(kv, "=")
|
||||
if len(cs) != 2 {
|
||||
return nil, fmt.Errorf("sysctl %q not of the format sysctl_name=value", kv)
|
||||
}
|
||||
sysctls[i].Name = cs[0]
|
||||
sysctls[i].Value = cs[1]
|
||||
}
|
||||
return sysctls, nil
|
||||
}
|
||||
|
||||
// PodAnnotationsFromSysctls creates an annotation value for a slice of Sysctls.
|
||||
func PodAnnotationsFromSysctls(sysctls []Sysctl) string {
|
||||
if len(sysctls) == 0 {
|
||||
return ""
|
||||
}
|
||||
|
||||
kvs := make([]string, len(sysctls))
|
||||
for i := range sysctls {
|
||||
kvs[i] = fmt.Sprintf("%s=%s", sysctls[i].Name, sysctls[i].Value)
|
||||
}
|
||||
return strings.Join(kvs, ",")
|
||||
}
|
||||
|
||||
@@ -391,3 +391,42 @@ func TestGetAvoidPodsFromNode(t *testing.T) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestSysctlsFromPodAnnotation(t *testing.T) {
|
||||
type Test struct {
|
||||
annotation string
|
||||
expectValue []Sysctl
|
||||
expectErr bool
|
||||
}
|
||||
for i, test := range []Test{
|
||||
{
|
||||
annotation: "",
|
||||
expectValue: nil,
|
||||
},
|
||||
{
|
||||
annotation: "foo.bar",
|
||||
expectErr: true,
|
||||
},
|
||||
{
|
||||
annotation: "foo.bar=42",
|
||||
expectValue: []Sysctl{{Name: "foo.bar", Value: "42"}},
|
||||
},
|
||||
{
|
||||
annotation: "foo.bar=42,",
|
||||
expectErr: true,
|
||||
},
|
||||
{
|
||||
annotation: "foo.bar=42,abc.def=1",
|
||||
expectValue: []Sysctl{{Name: "foo.bar", Value: "42"}, {Name: "abc.def", Value: "1"}},
|
||||
},
|
||||
} {
|
||||
sysctls, err := SysctlsFromPodAnnotation(test.annotation)
|
||||
if test.expectErr && err == nil {
|
||||
t.Errorf("[%v]expected error but got none", i)
|
||||
} else if !test.expectErr && err != nil {
|
||||
t.Errorf("[%v]did not expect error but got: %v", i, err)
|
||||
} else if !reflect.DeepEqual(sysctls, test.expectValue) {
|
||||
t.Errorf("[%v]expect value %v but got %v", i, test.expectValue, sysctls)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1564,6 +1564,14 @@ type PodSpec struct {
|
||||
Subdomain string `json:"subdomain,omitempty"`
|
||||
}
|
||||
|
||||
// Sysctl defines a kernel parameter to be set
|
||||
type Sysctl struct {
|
||||
// Name of a property to set
|
||||
Name string `json:"name"`
|
||||
// Value of a property to set
|
||||
Value string `json:"value"`
|
||||
}
|
||||
|
||||
// PodSecurityContext holds pod-level security attributes and common container settings.
|
||||
// Some fields are also present in container.securityContext. Field values of
|
||||
// container.securityContext take precedence over field values of PodSecurityContext.
|
||||
|
||||
@@ -23,6 +23,7 @@ import (
|
||||
"os"
|
||||
"path"
|
||||
"reflect"
|
||||
"regexp"
|
||||
"strings"
|
||||
|
||||
"github.com/golang/glog"
|
||||
@@ -132,6 +133,23 @@ func ValidatePodSpecificAnnotations(annotations map[string]string, spec *api.Pod
|
||||
allErrs = append(allErrs, ValidateSeccompPodAnnotations(annotations, fldPath)...)
|
||||
allErrs = append(allErrs, ValidateAppArmorPodAnnotations(annotations, spec, fldPath)...)
|
||||
|
||||
sysctls, err := api.SysctlsFromPodAnnotation(annotations[api.SysctlsPodAnnotationKey])
|
||||
if err != nil {
|
||||
allErrs = append(allErrs, field.Invalid(fldPath.Key(api.SysctlsPodAnnotationKey), annotations[api.SysctlsPodAnnotationKey], err.Error()))
|
||||
} else {
|
||||
allErrs = append(allErrs, validateSysctls(sysctls, fldPath.Key(api.SysctlsPodAnnotationKey))...)
|
||||
}
|
||||
unsafeSysctls, err := api.SysctlsFromPodAnnotation(annotations[api.UnsafeSysctlsPodAnnotationKey])
|
||||
if err != nil {
|
||||
allErrs = append(allErrs, field.Invalid(fldPath.Key(api.UnsafeSysctlsPodAnnotationKey), annotations[api.UnsafeSysctlsPodAnnotationKey], err.Error()))
|
||||
} else {
|
||||
allErrs = append(allErrs, validateSysctls(unsafeSysctls, fldPath.Key(api.UnsafeSysctlsPodAnnotationKey))...)
|
||||
}
|
||||
inBoth := sysctlIntersection(sysctls, unsafeSysctls)
|
||||
if len(inBoth) > 0 {
|
||||
allErrs = append(allErrs, field.Invalid(fldPath.Key(api.UnsafeSysctlsPodAnnotationKey), strings.Join(inBoth, ", "), "can not be safe and unsafe"))
|
||||
}
|
||||
|
||||
return allErrs
|
||||
}
|
||||
|
||||
@@ -2128,6 +2146,40 @@ func podSpecHasContainer(spec *api.PodSpec, containerName string) bool {
|
||||
return false
|
||||
}
|
||||
|
||||
const (
|
||||
// a sysctl segment regex, concatenated with dots to form a sysctl name
|
||||
SysctlSegmentFmt string = "[a-z0-9]([-_a-z0-9]*[a-z0-9])?"
|
||||
|
||||
// a sysctl name regex
|
||||
SysctlFmt string = "(" + SysctlSegmentFmt + "\\.)*" + SysctlSegmentFmt
|
||||
|
||||
// the maximal length of a sysctl name
|
||||
SysctlMaxLength int = 253
|
||||
)
|
||||
|
||||
var sysctlRegexp = regexp.MustCompile("^" + SysctlFmt + "$")
|
||||
|
||||
// IsValidSysctlName checks that the given string is a valid sysctl name,
|
||||
// i.e. matches SysctlFmt.
|
||||
func IsValidSysctlName(name string) bool {
|
||||
if len(name) > SysctlMaxLength {
|
||||
return false
|
||||
}
|
||||
return sysctlRegexp.MatchString(name)
|
||||
}
|
||||
|
||||
func validateSysctls(sysctls []api.Sysctl, fldPath *field.Path) field.ErrorList {
|
||||
allErrs := field.ErrorList{}
|
||||
for i, s := range sysctls {
|
||||
if len(s.Name) == 0 {
|
||||
allErrs = append(allErrs, field.Required(fldPath.Index(i).Child("name"), ""))
|
||||
} else if !IsValidSysctlName(s.Name) {
|
||||
allErrs = append(allErrs, field.Invalid(fldPath.Index(i).Child("name"), s.Name, fmt.Sprintf("must have at most %d characters and match regex %s", SysctlMaxLength, SysctlFmt)))
|
||||
}
|
||||
}
|
||||
return allErrs
|
||||
}
|
||||
|
||||
// ValidatePodSecurityContext test that the specified PodSecurityContext has valid data.
|
||||
func ValidatePodSecurityContext(securityContext *api.PodSecurityContext, spec *api.PodSpec, specPath, fldPath *field.Path) field.ErrorList {
|
||||
allErrs := field.ErrorList{}
|
||||
@@ -3475,3 +3527,17 @@ func isValidHostnamesMap(serializedPodHostNames string) bool {
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func sysctlIntersection(a []api.Sysctl, b []api.Sysctl) []string {
|
||||
lookup := make(map[string]struct{}, len(a))
|
||||
result := []string{}
|
||||
for i := range a {
|
||||
lookup[a[i].Name] = struct{}{}
|
||||
}
|
||||
for i := range b {
|
||||
if _, found := lookup[b[i].Name]; found {
|
||||
result = append(result, b[i].Name)
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
@@ -3538,6 +3538,17 @@ func TestValidatePod(t *testing.T) {
|
||||
},
|
||||
Spec: validPodSpec,
|
||||
},
|
||||
{ // syntactically valid sysctls
|
||||
ObjectMeta: api.ObjectMeta{
|
||||
Name: "123",
|
||||
Namespace: "ns",
|
||||
Annotations: map[string]string{
|
||||
api.SysctlsPodAnnotationKey: "kernel.shmmni=32768,kernel.shmmax=1000000000",
|
||||
api.UnsafeSysctlsPodAnnotationKey: "knet.ipv4.route.min_pmtu=1000",
|
||||
},
|
||||
},
|
||||
Spec: validPodSpec,
|
||||
},
|
||||
}
|
||||
for _, pod := range successCases {
|
||||
if errs := ValidatePod(&pod); len(errs) != 0 {
|
||||
@@ -3987,6 +3998,47 @@ func TestValidatePod(t *testing.T) {
|
||||
},
|
||||
Spec: validPodSpec,
|
||||
},
|
||||
"invalid sysctl annotation": {
|
||||
ObjectMeta: api.ObjectMeta{
|
||||
Name: "123",
|
||||
Namespace: "ns",
|
||||
Annotations: map[string]string{
|
||||
api.SysctlsPodAnnotationKey: "foo:",
|
||||
},
|
||||
},
|
||||
Spec: validPodSpec,
|
||||
},
|
||||
"invalid comma-separated sysctl annotation": {
|
||||
ObjectMeta: api.ObjectMeta{
|
||||
Name: "123",
|
||||
Namespace: "ns",
|
||||
Annotations: map[string]string{
|
||||
api.SysctlsPodAnnotationKey: "kernel.msgmax,",
|
||||
},
|
||||
},
|
||||
Spec: validPodSpec,
|
||||
},
|
||||
"invalid unsafe sysctl annotation": {
|
||||
ObjectMeta: api.ObjectMeta{
|
||||
Name: "123",
|
||||
Namespace: "ns",
|
||||
Annotations: map[string]string{
|
||||
api.SysctlsPodAnnotationKey: "foo:",
|
||||
},
|
||||
},
|
||||
Spec: validPodSpec,
|
||||
},
|
||||
"intersecting safe sysctls and unsafe sysctls annotations": {
|
||||
ObjectMeta: api.ObjectMeta{
|
||||
Name: "123",
|
||||
Namespace: "ns",
|
||||
Annotations: map[string]string{
|
||||
api.SysctlsPodAnnotationKey: "kernel.shmmax=10000000",
|
||||
api.UnsafeSysctlsPodAnnotationKey: "kernel.shmmax=10000000",
|
||||
},
|
||||
},
|
||||
Spec: validPodSpec,
|
||||
},
|
||||
}
|
||||
for k, v := range errorCases {
|
||||
if errs := ValidatePod(&v); len(errs) == 0 {
|
||||
@@ -7826,3 +7878,91 @@ func TestValidateHasLabel(t *testing.T) {
|
||||
t.Errorf("expected failure")
|
||||
}
|
||||
}
|
||||
|
||||
func TestIsValidSysctlName(t *testing.T) {
|
||||
valid := []string{
|
||||
"a.b.c.d",
|
||||
"a",
|
||||
"a_b",
|
||||
"a-b",
|
||||
"abc",
|
||||
"abc.def",
|
||||
}
|
||||
invalid := []string{
|
||||
"",
|
||||
"*",
|
||||
"ä",
|
||||
"a_",
|
||||
"_",
|
||||
"__",
|
||||
"_a",
|
||||
"_a._b",
|
||||
"-",
|
||||
".",
|
||||
"a.",
|
||||
".a",
|
||||
"a.b.",
|
||||
"a*.b",
|
||||
"a*b",
|
||||
"*a",
|
||||
"a.*",
|
||||
"*",
|
||||
"abc*",
|
||||
"a.abc*",
|
||||
"a.b.*",
|
||||
"Abc",
|
||||
func(n int) string {
|
||||
x := make([]byte, n)
|
||||
for i := range x {
|
||||
x[i] = byte('a')
|
||||
}
|
||||
return string(x)
|
||||
}(256),
|
||||
}
|
||||
for _, s := range valid {
|
||||
if !IsValidSysctlName(s) {
|
||||
t.Errorf("%q expected to be a valid sysctl name", s)
|
||||
}
|
||||
}
|
||||
for _, s := range invalid {
|
||||
if IsValidSysctlName(s) {
|
||||
t.Errorf("%q expected to be an invalid sysctl name", s)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestValidateSysctls(t *testing.T) {
|
||||
valid := []string{
|
||||
"net.foo.bar",
|
||||
"kernel.shmmax",
|
||||
}
|
||||
invalid := []string{
|
||||
"i..nvalid",
|
||||
"_invalid",
|
||||
}
|
||||
|
||||
sysctls := make([]api.Sysctl, len(valid))
|
||||
for i, sysctl := range valid {
|
||||
sysctls[i].Name = sysctl
|
||||
}
|
||||
errs := validateSysctls(sysctls, field.NewPath("foo"))
|
||||
if len(errs) != 0 {
|
||||
t.Errorf("unexpected validation errors: %v", errs)
|
||||
}
|
||||
|
||||
sysctls = make([]api.Sysctl, len(invalid))
|
||||
for i, sysctl := range invalid {
|
||||
sysctls[i].Name = sysctl
|
||||
}
|
||||
errs = validateSysctls(sysctls, field.NewPath("foo"))
|
||||
if len(errs) != 2 {
|
||||
t.Errorf("expected 2 validation errors. Got: %v", errs)
|
||||
} else {
|
||||
if got, expected := errs[0].Error(), "foo"; !strings.Contains(got, expected) {
|
||||
t.Errorf("unexpected errors: expected=%q, got=%q", expected, got)
|
||||
}
|
||||
if got, expected := errs[1].Error(), "foo"; !strings.Contains(got, expected) {
|
||||
t.Errorf("unexpected errors: expected=%q, got=%q", expected, got)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -660,6 +660,19 @@ func (dm *DockerManager) runContainer(
|
||||
SecurityOpt: securityOpts,
|
||||
}
|
||||
|
||||
// Set sysctls if requested
|
||||
sysctls, err := api.SysctlsFromPodAnnotation(pod.Annotations[api.SysctlsPodAnnotationKey])
|
||||
if err != nil {
|
||||
dm.recorder.Eventf(ref, api.EventTypeWarning, events.FailedToCreateContainer, "Failed to create docker container %q of pod %q with error: %v", container.Name, format.Pod(pod), err)
|
||||
return kubecontainer.ContainerID{}, err
|
||||
}
|
||||
if len(sysctls) > 0 {
|
||||
hc.Sysctls = make(map[string]string, len(sysctls))
|
||||
for _, c := range sysctls {
|
||||
hc.Sysctls[c.Name] = c.Value
|
||||
}
|
||||
}
|
||||
|
||||
// If current api version is newer than docker 1.10 requested, set OomScoreAdj to HostConfig
|
||||
result, err := dm.checkDockerAPIVersion(dockerV110APIVersion)
|
||||
if err != nil {
|
||||
|
||||
Reference in New Issue
Block a user