mirror of
https://github.com/optim-enterprises-bv/kubernetes.git
synced 2025-11-01 18:58:18 +00:00
Add sysctl api, validation & Docker support
This commit is contained in:
committed by
Dr. Stefan Schimanski
parent
c5e7e5124a
commit
bea189e9c9
@@ -438,6 +438,20 @@ const (
|
|||||||
// PreferAvoidPodsAnnotationKey represents the key of preferAvoidPods data (json serialized)
|
// PreferAvoidPodsAnnotationKey represents the key of preferAvoidPods data (json serialized)
|
||||||
// in the Annotations of a Node.
|
// in the Annotations of a Node.
|
||||||
PreferAvoidPodsAnnotationKey string = "scheduler.alpha.kubernetes.io/preferAvoidPods"
|
PreferAvoidPodsAnnotationKey string = "scheduler.alpha.kubernetes.io/preferAvoidPods"
|
||||||
|
|
||||||
|
// SysctlsPodAnnotationKey represents the key of sysctls which are set for the infrastructure
|
||||||
|
// container of a pod. The annotation value is a comma separated list of sysctl_name=value
|
||||||
|
// key-value pairs. Only a limited set of whitelisted and isolated sysctls is supported by
|
||||||
|
// the kubelet. Pods with other sysctls will fail to launch.
|
||||||
|
SysctlsPodAnnotationKey string = "security.alpha.kubernetes.io/sysctls"
|
||||||
|
|
||||||
|
// UnsafeSysctlsPodAnnotationKey represents the key of sysctls which are set for the infrastructure
|
||||||
|
// container of a pod. The annotation value is a comma separated list of sysctl_name=value
|
||||||
|
// key-value pairs. Unsafe sysctls must be explicitly enabled for a kubelet. They are properly
|
||||||
|
// namespaced to a pod or a container, but their isolation is usually unclear or weak. Their use
|
||||||
|
// is at-your-own-risk. Pods that attempt to set an unsafe sysctl that is not enabled for a kubelet
|
||||||
|
// will fail to launch.
|
||||||
|
UnsafeSysctlsPodAnnotationKey string = "security.alpha.kubernetes.io/unsafe-sysctls"
|
||||||
)
|
)
|
||||||
|
|
||||||
// GetAffinityFromPod gets the json serialized affinity data from Pod.Annotations
|
// GetAffinityFromPod gets the json serialized affinity data from Pod.Annotations
|
||||||
@@ -522,3 +536,51 @@ func GetAvoidPodsFromNodeAnnotations(annotations map[string]string) (AvoidPods,
|
|||||||
}
|
}
|
||||||
return avoidPods, nil
|
return avoidPods, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// SysctlsFromPodAnnotations parses the sysctl annotations into a slice of safe Sysctls
|
||||||
|
// and a slice of unsafe Sysctls. This is only a convenience wrapper around
|
||||||
|
// SysctlsFromPodAnnotation.
|
||||||
|
func SysctlsFromPodAnnotations(a map[string]string) ([]Sysctl, []Sysctl, error) {
|
||||||
|
safe, err := SysctlsFromPodAnnotation(a[SysctlsPodAnnotationKey])
|
||||||
|
if err != nil {
|
||||||
|
return nil, nil, err
|
||||||
|
}
|
||||||
|
unsafe, err := SysctlsFromPodAnnotation(a[UnsafeSysctlsPodAnnotationKey])
|
||||||
|
if err != nil {
|
||||||
|
return nil, nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return safe, unsafe, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// SysctlsFromPodAnnotation parses an annotation value into a slice of Sysctls.
|
||||||
|
func SysctlsFromPodAnnotation(annotation string) ([]Sysctl, error) {
|
||||||
|
if len(annotation) == 0 {
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
kvs := strings.Split(annotation, ",")
|
||||||
|
sysctls := make([]Sysctl, len(kvs))
|
||||||
|
for i, kv := range kvs {
|
||||||
|
cs := strings.Split(kv, "=")
|
||||||
|
if len(cs) != 2 {
|
||||||
|
return nil, fmt.Errorf("sysctl %q not of the format sysctl_name=value", kv)
|
||||||
|
}
|
||||||
|
sysctls[i].Name = cs[0]
|
||||||
|
sysctls[i].Value = cs[1]
|
||||||
|
}
|
||||||
|
return sysctls, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// PodAnnotationsFromSysctls creates an annotation value for a slice of Sysctls.
|
||||||
|
func PodAnnotationsFromSysctls(sysctls []Sysctl) string {
|
||||||
|
if len(sysctls) == 0 {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
kvs := make([]string, len(sysctls))
|
||||||
|
for i := range sysctls {
|
||||||
|
kvs[i] = fmt.Sprintf("%s=%s", sysctls[i].Name, sysctls[i].Value)
|
||||||
|
}
|
||||||
|
return strings.Join(kvs, ",")
|
||||||
|
}
|
||||||
|
|||||||
@@ -391,3 +391,42 @@ func TestGetAvoidPodsFromNode(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestSysctlsFromPodAnnotation(t *testing.T) {
|
||||||
|
type Test struct {
|
||||||
|
annotation string
|
||||||
|
expectValue []Sysctl
|
||||||
|
expectErr bool
|
||||||
|
}
|
||||||
|
for i, test := range []Test{
|
||||||
|
{
|
||||||
|
annotation: "",
|
||||||
|
expectValue: nil,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
annotation: "foo.bar",
|
||||||
|
expectErr: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
annotation: "foo.bar=42",
|
||||||
|
expectValue: []Sysctl{{Name: "foo.bar", Value: "42"}},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
annotation: "foo.bar=42,",
|
||||||
|
expectErr: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
annotation: "foo.bar=42,abc.def=1",
|
||||||
|
expectValue: []Sysctl{{Name: "foo.bar", Value: "42"}, {Name: "abc.def", Value: "1"}},
|
||||||
|
},
|
||||||
|
} {
|
||||||
|
sysctls, err := SysctlsFromPodAnnotation(test.annotation)
|
||||||
|
if test.expectErr && err == nil {
|
||||||
|
t.Errorf("[%v]expected error but got none", i)
|
||||||
|
} else if !test.expectErr && err != nil {
|
||||||
|
t.Errorf("[%v]did not expect error but got: %v", i, err)
|
||||||
|
} else if !reflect.DeepEqual(sysctls, test.expectValue) {
|
||||||
|
t.Errorf("[%v]expect value %v but got %v", i, test.expectValue, sysctls)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@@ -1564,6 +1564,14 @@ type PodSpec struct {
|
|||||||
Subdomain string `json:"subdomain,omitempty"`
|
Subdomain string `json:"subdomain,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Sysctl defines a kernel parameter to be set
|
||||||
|
type Sysctl struct {
|
||||||
|
// Name of a property to set
|
||||||
|
Name string `json:"name"`
|
||||||
|
// Value of a property to set
|
||||||
|
Value string `json:"value"`
|
||||||
|
}
|
||||||
|
|
||||||
// PodSecurityContext holds pod-level security attributes and common container settings.
|
// PodSecurityContext holds pod-level security attributes and common container settings.
|
||||||
// Some fields are also present in container.securityContext. Field values of
|
// Some fields are also present in container.securityContext. Field values of
|
||||||
// container.securityContext take precedence over field values of PodSecurityContext.
|
// container.securityContext take precedence over field values of PodSecurityContext.
|
||||||
|
|||||||
@@ -23,6 +23,7 @@ import (
|
|||||||
"os"
|
"os"
|
||||||
"path"
|
"path"
|
||||||
"reflect"
|
"reflect"
|
||||||
|
"regexp"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
"github.com/golang/glog"
|
"github.com/golang/glog"
|
||||||
@@ -132,6 +133,23 @@ func ValidatePodSpecificAnnotations(annotations map[string]string, spec *api.Pod
|
|||||||
allErrs = append(allErrs, ValidateSeccompPodAnnotations(annotations, fldPath)...)
|
allErrs = append(allErrs, ValidateSeccompPodAnnotations(annotations, fldPath)...)
|
||||||
allErrs = append(allErrs, ValidateAppArmorPodAnnotations(annotations, spec, fldPath)...)
|
allErrs = append(allErrs, ValidateAppArmorPodAnnotations(annotations, spec, fldPath)...)
|
||||||
|
|
||||||
|
sysctls, err := api.SysctlsFromPodAnnotation(annotations[api.SysctlsPodAnnotationKey])
|
||||||
|
if err != nil {
|
||||||
|
allErrs = append(allErrs, field.Invalid(fldPath.Key(api.SysctlsPodAnnotationKey), annotations[api.SysctlsPodAnnotationKey], err.Error()))
|
||||||
|
} else {
|
||||||
|
allErrs = append(allErrs, validateSysctls(sysctls, fldPath.Key(api.SysctlsPodAnnotationKey))...)
|
||||||
|
}
|
||||||
|
unsafeSysctls, err := api.SysctlsFromPodAnnotation(annotations[api.UnsafeSysctlsPodAnnotationKey])
|
||||||
|
if err != nil {
|
||||||
|
allErrs = append(allErrs, field.Invalid(fldPath.Key(api.UnsafeSysctlsPodAnnotationKey), annotations[api.UnsafeSysctlsPodAnnotationKey], err.Error()))
|
||||||
|
} else {
|
||||||
|
allErrs = append(allErrs, validateSysctls(unsafeSysctls, fldPath.Key(api.UnsafeSysctlsPodAnnotationKey))...)
|
||||||
|
}
|
||||||
|
inBoth := sysctlIntersection(sysctls, unsafeSysctls)
|
||||||
|
if len(inBoth) > 0 {
|
||||||
|
allErrs = append(allErrs, field.Invalid(fldPath.Key(api.UnsafeSysctlsPodAnnotationKey), strings.Join(inBoth, ", "), "can not be safe and unsafe"))
|
||||||
|
}
|
||||||
|
|
||||||
return allErrs
|
return allErrs
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -2128,6 +2146,40 @@ func podSpecHasContainer(spec *api.PodSpec, containerName string) bool {
|
|||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const (
|
||||||
|
// a sysctl segment regex, concatenated with dots to form a sysctl name
|
||||||
|
SysctlSegmentFmt string = "[a-z0-9]([-_a-z0-9]*[a-z0-9])?"
|
||||||
|
|
||||||
|
// a sysctl name regex
|
||||||
|
SysctlFmt string = "(" + SysctlSegmentFmt + "\\.)*" + SysctlSegmentFmt
|
||||||
|
|
||||||
|
// the maximal length of a sysctl name
|
||||||
|
SysctlMaxLength int = 253
|
||||||
|
)
|
||||||
|
|
||||||
|
var sysctlRegexp = regexp.MustCompile("^" + SysctlFmt + "$")
|
||||||
|
|
||||||
|
// IsValidSysctlName checks that the given string is a valid sysctl name,
|
||||||
|
// i.e. matches SysctlFmt.
|
||||||
|
func IsValidSysctlName(name string) bool {
|
||||||
|
if len(name) > SysctlMaxLength {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
return sysctlRegexp.MatchString(name)
|
||||||
|
}
|
||||||
|
|
||||||
|
func validateSysctls(sysctls []api.Sysctl, fldPath *field.Path) field.ErrorList {
|
||||||
|
allErrs := field.ErrorList{}
|
||||||
|
for i, s := range sysctls {
|
||||||
|
if len(s.Name) == 0 {
|
||||||
|
allErrs = append(allErrs, field.Required(fldPath.Index(i).Child("name"), ""))
|
||||||
|
} else if !IsValidSysctlName(s.Name) {
|
||||||
|
allErrs = append(allErrs, field.Invalid(fldPath.Index(i).Child("name"), s.Name, fmt.Sprintf("must have at most %d characters and match regex %s", SysctlMaxLength, SysctlFmt)))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return allErrs
|
||||||
|
}
|
||||||
|
|
||||||
// ValidatePodSecurityContext test that the specified PodSecurityContext has valid data.
|
// ValidatePodSecurityContext test that the specified PodSecurityContext has valid data.
|
||||||
func ValidatePodSecurityContext(securityContext *api.PodSecurityContext, spec *api.PodSpec, specPath, fldPath *field.Path) field.ErrorList {
|
func ValidatePodSecurityContext(securityContext *api.PodSecurityContext, spec *api.PodSpec, specPath, fldPath *field.Path) field.ErrorList {
|
||||||
allErrs := field.ErrorList{}
|
allErrs := field.ErrorList{}
|
||||||
@@ -3475,3 +3527,17 @@ func isValidHostnamesMap(serializedPodHostNames string) bool {
|
|||||||
}
|
}
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func sysctlIntersection(a []api.Sysctl, b []api.Sysctl) []string {
|
||||||
|
lookup := make(map[string]struct{}, len(a))
|
||||||
|
result := []string{}
|
||||||
|
for i := range a {
|
||||||
|
lookup[a[i].Name] = struct{}{}
|
||||||
|
}
|
||||||
|
for i := range b {
|
||||||
|
if _, found := lookup[b[i].Name]; found {
|
||||||
|
result = append(result, b[i].Name)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
|||||||
@@ -3538,6 +3538,17 @@ func TestValidatePod(t *testing.T) {
|
|||||||
},
|
},
|
||||||
Spec: validPodSpec,
|
Spec: validPodSpec,
|
||||||
},
|
},
|
||||||
|
{ // syntactically valid sysctls
|
||||||
|
ObjectMeta: api.ObjectMeta{
|
||||||
|
Name: "123",
|
||||||
|
Namespace: "ns",
|
||||||
|
Annotations: map[string]string{
|
||||||
|
api.SysctlsPodAnnotationKey: "kernel.shmmni=32768,kernel.shmmax=1000000000",
|
||||||
|
api.UnsafeSysctlsPodAnnotationKey: "knet.ipv4.route.min_pmtu=1000",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
Spec: validPodSpec,
|
||||||
|
},
|
||||||
}
|
}
|
||||||
for _, pod := range successCases {
|
for _, pod := range successCases {
|
||||||
if errs := ValidatePod(&pod); len(errs) != 0 {
|
if errs := ValidatePod(&pod); len(errs) != 0 {
|
||||||
@@ -3987,6 +3998,47 @@ func TestValidatePod(t *testing.T) {
|
|||||||
},
|
},
|
||||||
Spec: validPodSpec,
|
Spec: validPodSpec,
|
||||||
},
|
},
|
||||||
|
"invalid sysctl annotation": {
|
||||||
|
ObjectMeta: api.ObjectMeta{
|
||||||
|
Name: "123",
|
||||||
|
Namespace: "ns",
|
||||||
|
Annotations: map[string]string{
|
||||||
|
api.SysctlsPodAnnotationKey: "foo:",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
Spec: validPodSpec,
|
||||||
|
},
|
||||||
|
"invalid comma-separated sysctl annotation": {
|
||||||
|
ObjectMeta: api.ObjectMeta{
|
||||||
|
Name: "123",
|
||||||
|
Namespace: "ns",
|
||||||
|
Annotations: map[string]string{
|
||||||
|
api.SysctlsPodAnnotationKey: "kernel.msgmax,",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
Spec: validPodSpec,
|
||||||
|
},
|
||||||
|
"invalid unsafe sysctl annotation": {
|
||||||
|
ObjectMeta: api.ObjectMeta{
|
||||||
|
Name: "123",
|
||||||
|
Namespace: "ns",
|
||||||
|
Annotations: map[string]string{
|
||||||
|
api.SysctlsPodAnnotationKey: "foo:",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
Spec: validPodSpec,
|
||||||
|
},
|
||||||
|
"intersecting safe sysctls and unsafe sysctls annotations": {
|
||||||
|
ObjectMeta: api.ObjectMeta{
|
||||||
|
Name: "123",
|
||||||
|
Namespace: "ns",
|
||||||
|
Annotations: map[string]string{
|
||||||
|
api.SysctlsPodAnnotationKey: "kernel.shmmax=10000000",
|
||||||
|
api.UnsafeSysctlsPodAnnotationKey: "kernel.shmmax=10000000",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
Spec: validPodSpec,
|
||||||
|
},
|
||||||
}
|
}
|
||||||
for k, v := range errorCases {
|
for k, v := range errorCases {
|
||||||
if errs := ValidatePod(&v); len(errs) == 0 {
|
if errs := ValidatePod(&v); len(errs) == 0 {
|
||||||
@@ -7826,3 +7878,91 @@ func TestValidateHasLabel(t *testing.T) {
|
|||||||
t.Errorf("expected failure")
|
t.Errorf("expected failure")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestIsValidSysctlName(t *testing.T) {
|
||||||
|
valid := []string{
|
||||||
|
"a.b.c.d",
|
||||||
|
"a",
|
||||||
|
"a_b",
|
||||||
|
"a-b",
|
||||||
|
"abc",
|
||||||
|
"abc.def",
|
||||||
|
}
|
||||||
|
invalid := []string{
|
||||||
|
"",
|
||||||
|
"*",
|
||||||
|
"ä",
|
||||||
|
"a_",
|
||||||
|
"_",
|
||||||
|
"__",
|
||||||
|
"_a",
|
||||||
|
"_a._b",
|
||||||
|
"-",
|
||||||
|
".",
|
||||||
|
"a.",
|
||||||
|
".a",
|
||||||
|
"a.b.",
|
||||||
|
"a*.b",
|
||||||
|
"a*b",
|
||||||
|
"*a",
|
||||||
|
"a.*",
|
||||||
|
"*",
|
||||||
|
"abc*",
|
||||||
|
"a.abc*",
|
||||||
|
"a.b.*",
|
||||||
|
"Abc",
|
||||||
|
func(n int) string {
|
||||||
|
x := make([]byte, n)
|
||||||
|
for i := range x {
|
||||||
|
x[i] = byte('a')
|
||||||
|
}
|
||||||
|
return string(x)
|
||||||
|
}(256),
|
||||||
|
}
|
||||||
|
for _, s := range valid {
|
||||||
|
if !IsValidSysctlName(s) {
|
||||||
|
t.Errorf("%q expected to be a valid sysctl name", s)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for _, s := range invalid {
|
||||||
|
if IsValidSysctlName(s) {
|
||||||
|
t.Errorf("%q expected to be an invalid sysctl name", s)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestValidateSysctls(t *testing.T) {
|
||||||
|
valid := []string{
|
||||||
|
"net.foo.bar",
|
||||||
|
"kernel.shmmax",
|
||||||
|
}
|
||||||
|
invalid := []string{
|
||||||
|
"i..nvalid",
|
||||||
|
"_invalid",
|
||||||
|
}
|
||||||
|
|
||||||
|
sysctls := make([]api.Sysctl, len(valid))
|
||||||
|
for i, sysctl := range valid {
|
||||||
|
sysctls[i].Name = sysctl
|
||||||
|
}
|
||||||
|
errs := validateSysctls(sysctls, field.NewPath("foo"))
|
||||||
|
if len(errs) != 0 {
|
||||||
|
t.Errorf("unexpected validation errors: %v", errs)
|
||||||
|
}
|
||||||
|
|
||||||
|
sysctls = make([]api.Sysctl, len(invalid))
|
||||||
|
for i, sysctl := range invalid {
|
||||||
|
sysctls[i].Name = sysctl
|
||||||
|
}
|
||||||
|
errs = validateSysctls(sysctls, field.NewPath("foo"))
|
||||||
|
if len(errs) != 2 {
|
||||||
|
t.Errorf("expected 2 validation errors. Got: %v", errs)
|
||||||
|
} else {
|
||||||
|
if got, expected := errs[0].Error(), "foo"; !strings.Contains(got, expected) {
|
||||||
|
t.Errorf("unexpected errors: expected=%q, got=%q", expected, got)
|
||||||
|
}
|
||||||
|
if got, expected := errs[1].Error(), "foo"; !strings.Contains(got, expected) {
|
||||||
|
t.Errorf("unexpected errors: expected=%q, got=%q", expected, got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@@ -660,6 +660,19 @@ func (dm *DockerManager) runContainer(
|
|||||||
SecurityOpt: securityOpts,
|
SecurityOpt: securityOpts,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Set sysctls if requested
|
||||||
|
sysctls, err := api.SysctlsFromPodAnnotation(pod.Annotations[api.SysctlsPodAnnotationKey])
|
||||||
|
if err != nil {
|
||||||
|
dm.recorder.Eventf(ref, api.EventTypeWarning, events.FailedToCreateContainer, "Failed to create docker container %q of pod %q with error: %v", container.Name, format.Pod(pod), err)
|
||||||
|
return kubecontainer.ContainerID{}, err
|
||||||
|
}
|
||||||
|
if len(sysctls) > 0 {
|
||||||
|
hc.Sysctls = make(map[string]string, len(sysctls))
|
||||||
|
for _, c := range sysctls {
|
||||||
|
hc.Sysctls[c.Name] = c.Value
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// If current api version is newer than docker 1.10 requested, set OomScoreAdj to HostConfig
|
// If current api version is newer than docker 1.10 requested, set OomScoreAdj to HostConfig
|
||||||
result, err := dm.checkDockerAPIVersion(dockerV110APIVersion)
|
result, err := dm.checkDockerAPIVersion(dockerV110APIVersion)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|||||||
Reference in New Issue
Block a user