Add sysctl api, validation & Docker support

This commit is contained in:
Dominika Hodovska
2016-06-14 15:09:53 +02:00
committed by Dr. Stefan Schimanski
parent c5e7e5124a
commit bea189e9c9
6 changed files with 328 additions and 0 deletions

View File

@@ -438,6 +438,20 @@ const (
// PreferAvoidPodsAnnotationKey represents the key of preferAvoidPods data (json serialized)
// in the Annotations of a Node.
PreferAvoidPodsAnnotationKey string = "scheduler.alpha.kubernetes.io/preferAvoidPods"
// SysctlsPodAnnotationKey represents the key of sysctls which are set for the infrastructure
// container of a pod. The annotation value is a comma separated list of sysctl_name=value
// key-value pairs. Only a limited set of whitelisted and isolated sysctls is supported by
// the kubelet. Pods with other sysctls will fail to launch.
SysctlsPodAnnotationKey string = "security.alpha.kubernetes.io/sysctls"
// UnsafeSysctlsPodAnnotationKey represents the key of sysctls which are set for the infrastructure
// container of a pod. The annotation value is a comma separated list of sysctl_name=value
// key-value pairs. Unsafe sysctls must be explicitly enabled for a kubelet. They are properly
// namespaced to a pod or a container, but their isolation is usually unclear or weak. Their use
// is at-your-own-risk. Pods that attempt to set an unsafe sysctl that is not enabled for a kubelet
// will fail to launch.
UnsafeSysctlsPodAnnotationKey string = "security.alpha.kubernetes.io/unsafe-sysctls"
)
// GetAffinityFromPod gets the json serialized affinity data from Pod.Annotations
@@ -522,3 +536,51 @@ func GetAvoidPodsFromNodeAnnotations(annotations map[string]string) (AvoidPods,
}
return avoidPods, nil
}
// SysctlsFromPodAnnotations parses the sysctl annotations into a slice of safe Sysctls
// and a slice of unsafe Sysctls. This is only a convenience wrapper around
// SysctlsFromPodAnnotation.
func SysctlsFromPodAnnotations(a map[string]string) ([]Sysctl, []Sysctl, error) {
safe, err := SysctlsFromPodAnnotation(a[SysctlsPodAnnotationKey])
if err != nil {
return nil, nil, err
}
unsafe, err := SysctlsFromPodAnnotation(a[UnsafeSysctlsPodAnnotationKey])
if err != nil {
return nil, nil, err
}
return safe, unsafe, nil
}
// SysctlsFromPodAnnotation parses an annotation value into a slice of Sysctls.
func SysctlsFromPodAnnotation(annotation string) ([]Sysctl, error) {
if len(annotation) == 0 {
return nil, nil
}
kvs := strings.Split(annotation, ",")
sysctls := make([]Sysctl, len(kvs))
for i, kv := range kvs {
cs := strings.Split(kv, "=")
if len(cs) != 2 {
return nil, fmt.Errorf("sysctl %q not of the format sysctl_name=value", kv)
}
sysctls[i].Name = cs[0]
sysctls[i].Value = cs[1]
}
return sysctls, nil
}
// PodAnnotationsFromSysctls creates an annotation value for a slice of Sysctls.
func PodAnnotationsFromSysctls(sysctls []Sysctl) string {
if len(sysctls) == 0 {
return ""
}
kvs := make([]string, len(sysctls))
for i := range sysctls {
kvs[i] = fmt.Sprintf("%s=%s", sysctls[i].Name, sysctls[i].Value)
}
return strings.Join(kvs, ",")
}

View File

@@ -391,3 +391,42 @@ func TestGetAvoidPodsFromNode(t *testing.T) {
}
}
}
func TestSysctlsFromPodAnnotation(t *testing.T) {
type Test struct {
annotation string
expectValue []Sysctl
expectErr bool
}
for i, test := range []Test{
{
annotation: "",
expectValue: nil,
},
{
annotation: "foo.bar",
expectErr: true,
},
{
annotation: "foo.bar=42",
expectValue: []Sysctl{{Name: "foo.bar", Value: "42"}},
},
{
annotation: "foo.bar=42,",
expectErr: true,
},
{
annotation: "foo.bar=42,abc.def=1",
expectValue: []Sysctl{{Name: "foo.bar", Value: "42"}, {Name: "abc.def", Value: "1"}},
},
} {
sysctls, err := SysctlsFromPodAnnotation(test.annotation)
if test.expectErr && err == nil {
t.Errorf("[%v]expected error but got none", i)
} else if !test.expectErr && err != nil {
t.Errorf("[%v]did not expect error but got: %v", i, err)
} else if !reflect.DeepEqual(sysctls, test.expectValue) {
t.Errorf("[%v]expect value %v but got %v", i, test.expectValue, sysctls)
}
}
}

View File

@@ -1564,6 +1564,14 @@ type PodSpec struct {
Subdomain string `json:"subdomain,omitempty"`
}
// Sysctl defines a kernel parameter to be set
type Sysctl struct {
// Name of a property to set
Name string `json:"name"`
// Value of a property to set
Value string `json:"value"`
}
// PodSecurityContext holds pod-level security attributes and common container settings.
// Some fields are also present in container.securityContext. Field values of
// container.securityContext take precedence over field values of PodSecurityContext.

View File

@@ -23,6 +23,7 @@ import (
"os"
"path"
"reflect"
"regexp"
"strings"
"github.com/golang/glog"
@@ -132,6 +133,23 @@ func ValidatePodSpecificAnnotations(annotations map[string]string, spec *api.Pod
allErrs = append(allErrs, ValidateSeccompPodAnnotations(annotations, fldPath)...)
allErrs = append(allErrs, ValidateAppArmorPodAnnotations(annotations, spec, fldPath)...)
sysctls, err := api.SysctlsFromPodAnnotation(annotations[api.SysctlsPodAnnotationKey])
if err != nil {
allErrs = append(allErrs, field.Invalid(fldPath.Key(api.SysctlsPodAnnotationKey), annotations[api.SysctlsPodAnnotationKey], err.Error()))
} else {
allErrs = append(allErrs, validateSysctls(sysctls, fldPath.Key(api.SysctlsPodAnnotationKey))...)
}
unsafeSysctls, err := api.SysctlsFromPodAnnotation(annotations[api.UnsafeSysctlsPodAnnotationKey])
if err != nil {
allErrs = append(allErrs, field.Invalid(fldPath.Key(api.UnsafeSysctlsPodAnnotationKey), annotations[api.UnsafeSysctlsPodAnnotationKey], err.Error()))
} else {
allErrs = append(allErrs, validateSysctls(unsafeSysctls, fldPath.Key(api.UnsafeSysctlsPodAnnotationKey))...)
}
inBoth := sysctlIntersection(sysctls, unsafeSysctls)
if len(inBoth) > 0 {
allErrs = append(allErrs, field.Invalid(fldPath.Key(api.UnsafeSysctlsPodAnnotationKey), strings.Join(inBoth, ", "), "can not be safe and unsafe"))
}
return allErrs
}
@@ -2128,6 +2146,40 @@ func podSpecHasContainer(spec *api.PodSpec, containerName string) bool {
return false
}
const (
// a sysctl segment regex, concatenated with dots to form a sysctl name
SysctlSegmentFmt string = "[a-z0-9]([-_a-z0-9]*[a-z0-9])?"
// a sysctl name regex
SysctlFmt string = "(" + SysctlSegmentFmt + "\\.)*" + SysctlSegmentFmt
// the maximal length of a sysctl name
SysctlMaxLength int = 253
)
var sysctlRegexp = regexp.MustCompile("^" + SysctlFmt + "$")
// IsValidSysctlName checks that the given string is a valid sysctl name,
// i.e. matches SysctlFmt.
func IsValidSysctlName(name string) bool {
if len(name) > SysctlMaxLength {
return false
}
return sysctlRegexp.MatchString(name)
}
func validateSysctls(sysctls []api.Sysctl, fldPath *field.Path) field.ErrorList {
allErrs := field.ErrorList{}
for i, s := range sysctls {
if len(s.Name) == 0 {
allErrs = append(allErrs, field.Required(fldPath.Index(i).Child("name"), ""))
} else if !IsValidSysctlName(s.Name) {
allErrs = append(allErrs, field.Invalid(fldPath.Index(i).Child("name"), s.Name, fmt.Sprintf("must have at most %d characters and match regex %s", SysctlMaxLength, SysctlFmt)))
}
}
return allErrs
}
// ValidatePodSecurityContext test that the specified PodSecurityContext has valid data.
func ValidatePodSecurityContext(securityContext *api.PodSecurityContext, spec *api.PodSpec, specPath, fldPath *field.Path) field.ErrorList {
allErrs := field.ErrorList{}
@@ -3475,3 +3527,17 @@ func isValidHostnamesMap(serializedPodHostNames string) bool {
}
return true
}
func sysctlIntersection(a []api.Sysctl, b []api.Sysctl) []string {
lookup := make(map[string]struct{}, len(a))
result := []string{}
for i := range a {
lookup[a[i].Name] = struct{}{}
}
for i := range b {
if _, found := lookup[b[i].Name]; found {
result = append(result, b[i].Name)
}
}
return result
}

View File

@@ -3538,6 +3538,17 @@ func TestValidatePod(t *testing.T) {
},
Spec: validPodSpec,
},
{ // syntactically valid sysctls
ObjectMeta: api.ObjectMeta{
Name: "123",
Namespace: "ns",
Annotations: map[string]string{
api.SysctlsPodAnnotationKey: "kernel.shmmni=32768,kernel.shmmax=1000000000",
api.UnsafeSysctlsPodAnnotationKey: "knet.ipv4.route.min_pmtu=1000",
},
},
Spec: validPodSpec,
},
}
for _, pod := range successCases {
if errs := ValidatePod(&pod); len(errs) != 0 {
@@ -3987,6 +3998,47 @@ func TestValidatePod(t *testing.T) {
},
Spec: validPodSpec,
},
"invalid sysctl annotation": {
ObjectMeta: api.ObjectMeta{
Name: "123",
Namespace: "ns",
Annotations: map[string]string{
api.SysctlsPodAnnotationKey: "foo:",
},
},
Spec: validPodSpec,
},
"invalid comma-separated sysctl annotation": {
ObjectMeta: api.ObjectMeta{
Name: "123",
Namespace: "ns",
Annotations: map[string]string{
api.SysctlsPodAnnotationKey: "kernel.msgmax,",
},
},
Spec: validPodSpec,
},
"invalid unsafe sysctl annotation": {
ObjectMeta: api.ObjectMeta{
Name: "123",
Namespace: "ns",
Annotations: map[string]string{
api.SysctlsPodAnnotationKey: "foo:",
},
},
Spec: validPodSpec,
},
"intersecting safe sysctls and unsafe sysctls annotations": {
ObjectMeta: api.ObjectMeta{
Name: "123",
Namespace: "ns",
Annotations: map[string]string{
api.SysctlsPodAnnotationKey: "kernel.shmmax=10000000",
api.UnsafeSysctlsPodAnnotationKey: "kernel.shmmax=10000000",
},
},
Spec: validPodSpec,
},
}
for k, v := range errorCases {
if errs := ValidatePod(&v); len(errs) == 0 {
@@ -7826,3 +7878,91 @@ func TestValidateHasLabel(t *testing.T) {
t.Errorf("expected failure")
}
}
func TestIsValidSysctlName(t *testing.T) {
valid := []string{
"a.b.c.d",
"a",
"a_b",
"a-b",
"abc",
"abc.def",
}
invalid := []string{
"",
"*",
"ä",
"a_",
"_",
"__",
"_a",
"_a._b",
"-",
".",
"a.",
".a",
"a.b.",
"a*.b",
"a*b",
"*a",
"a.*",
"*",
"abc*",
"a.abc*",
"a.b.*",
"Abc",
func(n int) string {
x := make([]byte, n)
for i := range x {
x[i] = byte('a')
}
return string(x)
}(256),
}
for _, s := range valid {
if !IsValidSysctlName(s) {
t.Errorf("%q expected to be a valid sysctl name", s)
}
}
for _, s := range invalid {
if IsValidSysctlName(s) {
t.Errorf("%q expected to be an invalid sysctl name", s)
}
}
}
func TestValidateSysctls(t *testing.T) {
valid := []string{
"net.foo.bar",
"kernel.shmmax",
}
invalid := []string{
"i..nvalid",
"_invalid",
}
sysctls := make([]api.Sysctl, len(valid))
for i, sysctl := range valid {
sysctls[i].Name = sysctl
}
errs := validateSysctls(sysctls, field.NewPath("foo"))
if len(errs) != 0 {
t.Errorf("unexpected validation errors: %v", errs)
}
sysctls = make([]api.Sysctl, len(invalid))
for i, sysctl := range invalid {
sysctls[i].Name = sysctl
}
errs = validateSysctls(sysctls, field.NewPath("foo"))
if len(errs) != 2 {
t.Errorf("expected 2 validation errors. Got: %v", errs)
} else {
if got, expected := errs[0].Error(), "foo"; !strings.Contains(got, expected) {
t.Errorf("unexpected errors: expected=%q, got=%q", expected, got)
}
if got, expected := errs[1].Error(), "foo"; !strings.Contains(got, expected) {
t.Errorf("unexpected errors: expected=%q, got=%q", expected, got)
}
}
}

View File

@@ -660,6 +660,19 @@ func (dm *DockerManager) runContainer(
SecurityOpt: securityOpts,
}
// Set sysctls if requested
sysctls, err := api.SysctlsFromPodAnnotation(pod.Annotations[api.SysctlsPodAnnotationKey])
if err != nil {
dm.recorder.Eventf(ref, api.EventTypeWarning, events.FailedToCreateContainer, "Failed to create docker container %q of pod %q with error: %v", container.Name, format.Pod(pod), err)
return kubecontainer.ContainerID{}, err
}
if len(sysctls) > 0 {
hc.Sysctls = make(map[string]string, len(sysctls))
for _, c := range sysctls {
hc.Sysctls[c.Name] = c.Value
}
}
// If current api version is newer than docker 1.10 requested, set OomScoreAdj to HostConfig
result, err := dm.checkDockerAPIVersion(dockerV110APIVersion)
if err != nil {