mirror of
				https://github.com/optim-enterprises-bv/kubernetes.git
				synced 2025-11-03 19:58:17 +00:00 
			
		
		
		
	Add sysctl whitelist on the node
This commit is contained in:
		@@ -183,6 +183,7 @@ func (s *KubeletServer) AddFlags(fs *pflag.FlagSet) {
 | 
			
		||||
	fs.BoolVar(&s.MakeIPTablesUtilChains, "make-iptables-util-chains", s.MakeIPTablesUtilChains, "If true, kubelet will ensure iptables utility rules are present on host.")
 | 
			
		||||
	fs.Int32Var(&s.IPTablesMasqueradeBit, "iptables-masquerade-bit", s.IPTablesMasqueradeBit, "The bit of the fwmark space to mark packets for SNAT. Must be within the range [0, 31]. Please match this parameter with corresponding parameter in kube-proxy.")
 | 
			
		||||
	fs.Int32Var(&s.IPTablesDropBit, "iptables-drop-bit", s.IPTablesDropBit, "The bit of the fwmark space to mark packets for dropping. Must be within the range [0, 31].")
 | 
			
		||||
	fs.StringSliceVar(&s.AllowedUnsafeSysctls, "experimental-allowed-unsafe-sysctls", s.AllowedUnsafeSysctls, "Comma-separated whitelist of unsafe sysctls or unsafe sysctl patterns (ending in *). Use these at your own risk.")
 | 
			
		||||
 | 
			
		||||
	// Flags intended for testing, not recommended used in production environments.
 | 
			
		||||
	fs.StringVar(&s.RemoteRuntimeEndpoint, "container-runtime-endpoint", s.RemoteRuntimeEndpoint, "The unix socket endpoint of remote runtime service. If not empty, this option will override --container-runtime. This is an experimental feature. Intended for testing only.")
 | 
			
		||||
 
 | 
			
		||||
@@ -290,6 +290,7 @@ func UnsecuredKubeletConfig(s *options.KubeletServer) (*KubeletConfig, error) {
 | 
			
		||||
		StandaloneMode:                 (len(s.APIServerList) == 0),
 | 
			
		||||
		StreamingConnectionIdleTimeout: s.StreamingConnectionIdleTimeout.Duration,
 | 
			
		||||
		SyncFrequency:                  s.SyncFrequency.Duration,
 | 
			
		||||
		AllowedUnsafeSysctls:           s.AllowedUnsafeSysctls,
 | 
			
		||||
		SystemCgroups:                  s.SystemCgroups,
 | 
			
		||||
		TLSOptions:                     tlsOptions,
 | 
			
		||||
		Writer:                         writer,
 | 
			
		||||
@@ -1098,6 +1099,7 @@ type KubeletConfig struct {
 | 
			
		||||
	StandaloneMode                 bool
 | 
			
		||||
	StreamingConnectionIdleTimeout time.Duration
 | 
			
		||||
	SyncFrequency                  time.Duration
 | 
			
		||||
	AllowedUnsafeSysctls           []string
 | 
			
		||||
	SystemCgroups                  string
 | 
			
		||||
	TLSOptions                     *server.TLSOptions
 | 
			
		||||
	Writer                         io.Writer
 | 
			
		||||
@@ -1218,6 +1220,7 @@ func CreateAndInitKubelet(kc *KubeletConfig) (k KubeletBootstrap, pc *config.Pod
 | 
			
		||||
		kc.MakeIPTablesUtilChains,
 | 
			
		||||
		kc.iptablesMasqueradeBit,
 | 
			
		||||
		kc.iptablesDropBit,
 | 
			
		||||
		kc.AllowedUnsafeSysctls,
 | 
			
		||||
	)
 | 
			
		||||
 | 
			
		||||
	if err != nil {
 | 
			
		||||
 
 | 
			
		||||
@@ -100,6 +100,7 @@ pkg/kubelet/api
 | 
			
		||||
pkg/kubelet/container
 | 
			
		||||
pkg/kubelet/envvars
 | 
			
		||||
pkg/kubelet/eviction
 | 
			
		||||
pkg/kubelet/sysctls
 | 
			
		||||
pkg/kubelet/util/format
 | 
			
		||||
pkg/kubelet/util/ioutils
 | 
			
		||||
pkg/kubelet/volume
 | 
			
		||||
@@ -139,6 +140,7 @@ pkg/runtime/serializer/yaml
 | 
			
		||||
pkg/security
 | 
			
		||||
pkg/security/podsecuritypolicy/apparmor
 | 
			
		||||
pkg/security/podsecuritypolicy/capabilities
 | 
			
		||||
pkg/security/podsecuritypolicy/sysctl
 | 
			
		||||
pkg/serviceaccount
 | 
			
		||||
pkg/storage
 | 
			
		||||
pkg/storage/etcd3
 | 
			
		||||
 
 | 
			
		||||
@@ -164,6 +164,7 @@ executor-logv
 | 
			
		||||
executor-path
 | 
			
		||||
executor-suicide-timeout
 | 
			
		||||
exit-on-lock-contention
 | 
			
		||||
experimental-allowed-unsafe-sysctls
 | 
			
		||||
experimental-bootstrap-kubeconfig
 | 
			
		||||
experimental-flannel-overlay
 | 
			
		||||
experimental-keystone-url
 | 
			
		||||
 
 | 
			
		||||
@@ -419,6 +419,8 @@ type KubeletConfiguration struct {
 | 
			
		||||
	// iptablesDropBit is the bit of the iptables fwmark space to use for dropping packets. Kubelet will ensure iptables mark and drop rules.
 | 
			
		||||
	// Values must be within the range [0, 31]. Must be different from IPTablesMasqueradeBit
 | 
			
		||||
	IPTablesDropBit int32 `json:"iptablesDropBit"`
 | 
			
		||||
	// Whitelist of unsafe sysctls or sysctl patterns (ending in *).
 | 
			
		||||
	AllowedUnsafeSysctls []string `json:"experimentalAllowedUnsafeSysctls,omitempty"`
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
type KubeSchedulerConfiguration struct {
 | 
			
		||||
 
 | 
			
		||||
@@ -474,4 +474,7 @@ type KubeletConfiguration struct {
 | 
			
		||||
	// iptablesDropBit is the bit of the iptables fwmark space to mark for dropping packets.
 | 
			
		||||
	// Values must be within the range [0, 31]. Must be different from other mark bits.
 | 
			
		||||
	IPTablesDropBit *int32 `json:"iptablesDropBit"`
 | 
			
		||||
	// Whitelist of unsafe sysctls or sysctl patterns (ending in *). Use these at your own risk.
 | 
			
		||||
	// Resource isolation might be lacking and pod might influence each other on the same node.
 | 
			
		||||
	AllowedUnsafeSysctls []string `json:"allowedUnsafeSysctls,omitempty"`
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
@@ -76,9 +76,10 @@ const (
 | 
			
		||||
	// docker version should be at least 1.9.x
 | 
			
		||||
	minimumDockerAPIVersion = "1.21"
 | 
			
		||||
 | 
			
		||||
	// Remote API version for docker daemon version v1.10
 | 
			
		||||
	// Remote API version for docker daemon versions
 | 
			
		||||
	// https://docs.docker.com/engine/reference/api/docker_remote_api/
 | 
			
		||||
	dockerV110APIVersion = "1.22"
 | 
			
		||||
	DockerV112APIVersion = "1.24"
 | 
			
		||||
 | 
			
		||||
	// ndots specifies the minimum number of dots that a domain name must contain for the resolver to consider it as FQDN (fully-qualified)
 | 
			
		||||
	// we want to able to consider SRV lookup names like _dns._udp.kube-dns.default.svc to be considered relative.
 | 
			
		||||
@@ -661,16 +662,19 @@ func (dm *DockerManager) runContainer(
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	// Set sysctls if requested
 | 
			
		||||
	sysctls, err := api.SysctlsFromPodAnnotation(pod.Annotations[api.SysctlsPodAnnotationKey])
 | 
			
		||||
	sysctls, unsafeSysctls, err := api.SysctlsFromPodAnnotations(pod.Annotations)
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		dm.recorder.Eventf(ref, api.EventTypeWarning, events.FailedToCreateContainer, "Failed to create docker container %q of pod %q with error: %v", container.Name, format.Pod(pod), err)
 | 
			
		||||
		return kubecontainer.ContainerID{}, err
 | 
			
		||||
	}
 | 
			
		||||
	if len(sysctls) > 0 {
 | 
			
		||||
		hc.Sysctls = make(map[string]string, len(sysctls))
 | 
			
		||||
	if len(sysctls)+len(unsafeSysctls) > 0 {
 | 
			
		||||
		hc.Sysctls = make(map[string]string, len(sysctls)+len(unsafeSysctls))
 | 
			
		||||
		for _, c := range sysctls {
 | 
			
		||||
			hc.Sysctls[c.Name] = c.Value
 | 
			
		||||
		}
 | 
			
		||||
		for _, c := range unsafeSysctls {
 | 
			
		||||
			hc.Sysctls[c.Name] = c.Value
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	// If current api version is newer than docker 1.10 requested, set OomScoreAdj to HostConfig
 | 
			
		||||
 
 | 
			
		||||
@@ -67,6 +67,7 @@ import (
 | 
			
		||||
	"k8s.io/kubernetes/pkg/kubelet/server"
 | 
			
		||||
	"k8s.io/kubernetes/pkg/kubelet/server/stats"
 | 
			
		||||
	"k8s.io/kubernetes/pkg/kubelet/status"
 | 
			
		||||
	"k8s.io/kubernetes/pkg/kubelet/sysctl"
 | 
			
		||||
	kubetypes "k8s.io/kubernetes/pkg/kubelet/types"
 | 
			
		||||
	"k8s.io/kubernetes/pkg/kubelet/util/format"
 | 
			
		||||
	"k8s.io/kubernetes/pkg/kubelet/util/ioutils"
 | 
			
		||||
@@ -249,6 +250,7 @@ func NewMainKubelet(
 | 
			
		||||
	makeIPTablesUtilChains bool,
 | 
			
		||||
	iptablesMasqueradeBit int,
 | 
			
		||||
	iptablesDropBit int,
 | 
			
		||||
	allowedUnsafeSysctls []string,
 | 
			
		||||
) (*Kubelet, error) {
 | 
			
		||||
	if rootDirectory == "" {
 | 
			
		||||
		return nil, fmt.Errorf("invalid root directory %q", rootDirectory)
 | 
			
		||||
@@ -591,6 +593,26 @@ func NewMainKubelet(
 | 
			
		||||
	klet.evictionManager = evictionManager
 | 
			
		||||
	klet.AddPodAdmitHandler(evictionAdmitHandler)
 | 
			
		||||
 | 
			
		||||
	// add sysctl admission
 | 
			
		||||
	runtimeSupport, err := sysctl.NewRuntimeAdmitHandler(klet.containerRuntime)
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		return nil, err
 | 
			
		||||
	}
 | 
			
		||||
	safeWhitelist, err := sysctl.NewWhitelist(sysctl.SafeSysctlWhitelist(), api.SysctlsPodAnnotationKey)
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		return nil, err
 | 
			
		||||
	}
 | 
			
		||||
	// Safe, whitelisted sysctls can always be used as unsafe sysctls in the spec
 | 
			
		||||
	// Hence, we concatenate those two lists.
 | 
			
		||||
	safeAndUnsafeSysctls := append(sysctl.SafeSysctlWhitelist(), allowedUnsafeSysctls...)
 | 
			
		||||
	unsafeWhitelist, err := sysctl.NewWhitelist(safeAndUnsafeSysctls, api.UnsafeSysctlsPodAnnotationKey)
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		return nil, err
 | 
			
		||||
	}
 | 
			
		||||
	klet.AddPodAdmitHandler(runtimeSupport)
 | 
			
		||||
	klet.AddPodAdmitHandler(safeWhitelist)
 | 
			
		||||
	klet.AddPodAdmitHandler(unsafeWhitelist)
 | 
			
		||||
 | 
			
		||||
	// enable active deadline handler
 | 
			
		||||
	activeDeadlineHandler, err := newActiveDeadlineHandler(klet.statusManager, klet.recorder, klet.clock)
 | 
			
		||||
	if err != nil {
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										60
									
								
								pkg/kubelet/sysctl/namespace.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										60
									
								
								pkg/kubelet/sysctl/namespace.go
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,60 @@
 | 
			
		||||
/*
 | 
			
		||||
Copyright 2016 The Kubernetes Authors.
 | 
			
		||||
 | 
			
		||||
Licensed under the Apache License, Version 2.0 (the "License");
 | 
			
		||||
you may not use this file except in compliance with the License.
 | 
			
		||||
You may obtain a copy of the License at
 | 
			
		||||
 | 
			
		||||
    http://www.apache.org/licenses/LICENSE-2.0
 | 
			
		||||
 | 
			
		||||
Unless required by applicable law or agreed to in writing, software
 | 
			
		||||
distributed under the License is distributed on an "AS IS" BASIS,
 | 
			
		||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
			
		||||
See the License for the specific language governing permissions and
 | 
			
		||||
limitations under the License.
 | 
			
		||||
*/
 | 
			
		||||
 | 
			
		||||
package sysctl
 | 
			
		||||
 | 
			
		||||
import (
 | 
			
		||||
	"strings"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
// Namespace represents a kernel namespace name.
 | 
			
		||||
type Namespace string
 | 
			
		||||
 | 
			
		||||
const (
 | 
			
		||||
	// the Linux IPC namespace
 | 
			
		||||
	IpcNamespace = Namespace("ipc")
 | 
			
		||||
 | 
			
		||||
	// the network namespace
 | 
			
		||||
	NetNamespace = Namespace("net")
 | 
			
		||||
 | 
			
		||||
	// the zero value if no namespace is known
 | 
			
		||||
	UnknownNamespace = Namespace("")
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
var namespaces = map[string]Namespace{
 | 
			
		||||
	"kernel.sem": IpcNamespace,
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
var prefixNamespaces = map[string]Namespace{
 | 
			
		||||
	"kernel.shm": IpcNamespace,
 | 
			
		||||
	"kernel.msg": IpcNamespace,
 | 
			
		||||
	"fs.mqueue.": IpcNamespace,
 | 
			
		||||
	"net.":       NetNamespace,
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// NamespacedBy returns the namespace of the Linux kernel for a sysctl, or
 | 
			
		||||
// UnknownNamespace if the sysctl is not known to be namespaced.
 | 
			
		||||
func NamespacedBy(val string) Namespace {
 | 
			
		||||
	if ns, found := namespaces[val]; found {
 | 
			
		||||
		return ns
 | 
			
		||||
	}
 | 
			
		||||
	for p, ns := range prefixNamespaces {
 | 
			
		||||
		if strings.HasPrefix(val, p) {
 | 
			
		||||
			return ns
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
	return UnknownNamespace
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										36
									
								
								pkg/kubelet/sysctl/namespace_test.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										36
									
								
								pkg/kubelet/sysctl/namespace_test.go
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,36 @@
 | 
			
		||||
/*
 | 
			
		||||
Copyright 2016 The Kubernetes Authors.
 | 
			
		||||
 | 
			
		||||
Licensed under the Apache License, Version 2.0 (the "License");
 | 
			
		||||
you may not use this file except in compliance with the License.
 | 
			
		||||
You may obtain a copy of the License at
 | 
			
		||||
 | 
			
		||||
    http://www.apache.org/licenses/LICENSE-2.0
 | 
			
		||||
 | 
			
		||||
Unless required by applicable law or agreed to in writing, software
 | 
			
		||||
distributed under the License is distributed on an "AS IS" BASIS,
 | 
			
		||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
			
		||||
See the License for the specific language governing permissions and
 | 
			
		||||
limitations under the License.
 | 
			
		||||
*/
 | 
			
		||||
 | 
			
		||||
package sysctl
 | 
			
		||||
 | 
			
		||||
import (
 | 
			
		||||
	"testing"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
func TestNamespacedBy(t *testing.T) {
 | 
			
		||||
	tests := map[string]Namespace{
 | 
			
		||||
		"kernel.shm_rmid_forced": IpcNamespace,
 | 
			
		||||
		"net.a.b.c":              NetNamespace,
 | 
			
		||||
		"fs.mqueue.a.b.c":        IpcNamespace,
 | 
			
		||||
		"foo":                    UnknownNamespace,
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	for sysctl, ns := range tests {
 | 
			
		||||
		if got := NamespacedBy(sysctl); got != ns {
 | 
			
		||||
			t.Errorf("wrong namespace for %q: got=%s want=%s", sysctl, got, ns)
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										96
									
								
								pkg/kubelet/sysctl/runtime.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										96
									
								
								pkg/kubelet/sysctl/runtime.go
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,96 @@
 | 
			
		||||
/*
 | 
			
		||||
Copyright 2016 The Kubernetes Authors.
 | 
			
		||||
 | 
			
		||||
Licensed under the Apache License, Version 2.0 (the "License");
 | 
			
		||||
you may not use this file except in compliance with the License.
 | 
			
		||||
You may obtain a copy of the License at
 | 
			
		||||
 | 
			
		||||
    http://www.apache.org/licenses/LICENSE-2.0
 | 
			
		||||
 | 
			
		||||
Unless required by applicable law or agreed to in writing, software
 | 
			
		||||
distributed under the License is distributed on an "AS IS" BASIS,
 | 
			
		||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
			
		||||
See the License for the specific language governing permissions and
 | 
			
		||||
limitations under the License.
 | 
			
		||||
*/
 | 
			
		||||
 | 
			
		||||
package sysctl
 | 
			
		||||
 | 
			
		||||
import (
 | 
			
		||||
	"fmt"
 | 
			
		||||
 | 
			
		||||
	"k8s.io/kubernetes/pkg/api"
 | 
			
		||||
	"k8s.io/kubernetes/pkg/kubelet/container"
 | 
			
		||||
	"k8s.io/kubernetes/pkg/kubelet/dockertools"
 | 
			
		||||
	"k8s.io/kubernetes/pkg/kubelet/lifecycle"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
const (
 | 
			
		||||
	UnsupportedReason = "SysctlUnsupported"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
type runtimeAdmitHandler struct {
 | 
			
		||||
	result lifecycle.PodAdmitResult
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
var _ lifecycle.PodAdmitHandler = &runtimeAdmitHandler{}
 | 
			
		||||
 | 
			
		||||
// NewRuntimeAdmitHandler returns a sysctlRuntimeAdmitHandler which checks whether
 | 
			
		||||
// the given runtime support sysctls.
 | 
			
		||||
func NewRuntimeAdmitHandler(runtime container.Runtime) (*runtimeAdmitHandler, error) {
 | 
			
		||||
	if runtime.Type() == dockertools.DockerType {
 | 
			
		||||
		v, err := runtime.APIVersion()
 | 
			
		||||
		if err != nil {
 | 
			
		||||
			return nil, fmt.Errorf("failed to get runtime version: %v", err)
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		// only Docker >= 1.12 supports sysctls
 | 
			
		||||
		c, err := v.Compare(dockertools.DockerV112APIVersion)
 | 
			
		||||
		if err != nil {
 | 
			
		||||
			return nil, fmt.Errorf("failed to compare Docker version for sysctl support: %v", err)
 | 
			
		||||
		}
 | 
			
		||||
		if c >= 0 {
 | 
			
		||||
			return &runtimeAdmitHandler{
 | 
			
		||||
				result: lifecycle.PodAdmitResult{
 | 
			
		||||
					Admit: true,
 | 
			
		||||
				},
 | 
			
		||||
			}, nil
 | 
			
		||||
		}
 | 
			
		||||
		return &runtimeAdmitHandler{
 | 
			
		||||
			result: lifecycle.PodAdmitResult{
 | 
			
		||||
				Admit:   false,
 | 
			
		||||
				Reason:  UnsupportedReason,
 | 
			
		||||
				Message: "Docker before 1.12 does not support sysctls",
 | 
			
		||||
			},
 | 
			
		||||
		}, nil
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	// for other runtimes like rkt sysctls are not supported
 | 
			
		||||
	return &runtimeAdmitHandler{
 | 
			
		||||
		result: lifecycle.PodAdmitResult{
 | 
			
		||||
			Admit:   false,
 | 
			
		||||
			Reason:  UnsupportedReason,
 | 
			
		||||
			Message: fmt.Sprintf("runtime %v does not support sysctls", runtime.Type()),
 | 
			
		||||
		},
 | 
			
		||||
	}, nil
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Admit checks whether the runtime supports sysctls.
 | 
			
		||||
func (w *runtimeAdmitHandler) Admit(attrs *lifecycle.PodAdmitAttributes) lifecycle.PodAdmitResult {
 | 
			
		||||
	sysctls, unsafeSysctls, err := api.SysctlsFromPodAnnotations(attrs.Pod.Annotations)
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		return lifecycle.PodAdmitResult{
 | 
			
		||||
			Admit:   false,
 | 
			
		||||
			Reason:  AnnotationInvalidReason,
 | 
			
		||||
			Message: fmt.Sprintf("invalid sysctl annotation: %v", err),
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if len(sysctls)+len(unsafeSysctls) > 0 {
 | 
			
		||||
		return w.result
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	return lifecycle.PodAdmitResult{
 | 
			
		||||
		Admit: true,
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										171
									
								
								pkg/kubelet/sysctl/whitelist.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										171
									
								
								pkg/kubelet/sysctl/whitelist.go
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,171 @@
 | 
			
		||||
/*
 | 
			
		||||
Copyright 2016 The Kubernetes Authors.
 | 
			
		||||
 | 
			
		||||
Licensed under the Apache License, Version 2.0 (the "License");
 | 
			
		||||
you may not use this file except in compliance with the License.
 | 
			
		||||
You may obtain a copy of the License at
 | 
			
		||||
 | 
			
		||||
    http://www.apache.org/licenses/LICENSE-2.0
 | 
			
		||||
 | 
			
		||||
Unless required by applicable law or agreed to in writing, software
 | 
			
		||||
distributed under the License is distributed on an "AS IS" BASIS,
 | 
			
		||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
			
		||||
See the License for the specific language governing permissions and
 | 
			
		||||
limitations under the License.
 | 
			
		||||
*/
 | 
			
		||||
 | 
			
		||||
package sysctl
 | 
			
		||||
 | 
			
		||||
import (
 | 
			
		||||
	"fmt"
 | 
			
		||||
	"strings"
 | 
			
		||||
 | 
			
		||||
	"k8s.io/kubernetes/pkg/api"
 | 
			
		||||
	"k8s.io/kubernetes/pkg/api/validation"
 | 
			
		||||
	extvalidation "k8s.io/kubernetes/pkg/apis/extensions/validation"
 | 
			
		||||
	"k8s.io/kubernetes/pkg/kubelet/lifecycle"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
const (
 | 
			
		||||
	AnnotationInvalidReason = "InvalidSysctlAnnotation"
 | 
			
		||||
	ForbiddenReason         = "SysctlForbidden"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
// SafeSysctlWhitelist returns the whitelist of safe sysctls and safe sysctl patterns (ending in *).
 | 
			
		||||
//
 | 
			
		||||
// A sysctl is called safe iff
 | 
			
		||||
// - it is namespaced in the container or the pod
 | 
			
		||||
// - it is isolated, i.e. has no influence on any other pod on the same node.
 | 
			
		||||
func SafeSysctlWhitelist() []string {
 | 
			
		||||
	return []string{
 | 
			
		||||
		"kernel.shm_rmid_forced",
 | 
			
		||||
		"net.ipv4.ip_local_port_range",
 | 
			
		||||
		"net.ipv4.tcp_max_syn_backlog",
 | 
			
		||||
		"net.ipv4.tcp_syncookies",
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Whitelist provides a list of allowed sysctls and sysctl patterns (ending in *)
 | 
			
		||||
// and a function to check whether a given sysctl matches this list.
 | 
			
		||||
type Whitelist interface {
 | 
			
		||||
	// Validate checks that all sysctls given in a api.SysctlsPodAnnotationKey annotation
 | 
			
		||||
	// are valid according to the whitelist.
 | 
			
		||||
	Validate(pod *api.Pod) error
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// patternWhitelist takes a list of sysctls or sysctl patterns (ending in *) and
 | 
			
		||||
// checks validity via a sysctl and prefix map, rejecting those which are not known
 | 
			
		||||
// to be namespaced.
 | 
			
		||||
type patternWhitelist struct {
 | 
			
		||||
	sysctls       map[string]Namespace
 | 
			
		||||
	prefixes      map[string]Namespace
 | 
			
		||||
	annotationKey string
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
var _ lifecycle.PodAdmitHandler = &patternWhitelist{}
 | 
			
		||||
 | 
			
		||||
// NewWhitelist creates a new Whitelist from a list of sysctls and sysctl pattern (ending in *).
 | 
			
		||||
func NewWhitelist(patterns []string, annotationKey string) (*patternWhitelist, error) {
 | 
			
		||||
	w := &patternWhitelist{
 | 
			
		||||
		sysctls:       map[string]Namespace{},
 | 
			
		||||
		prefixes:      map[string]Namespace{},
 | 
			
		||||
		annotationKey: annotationKey,
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	for _, s := range patterns {
 | 
			
		||||
		if !extvalidation.IsValidSysctlPattern(s) {
 | 
			
		||||
			return nil, fmt.Errorf("sysctl %q must have at most %d characters and match regex %s",
 | 
			
		||||
				s,
 | 
			
		||||
				validation.SysctlMaxLength,
 | 
			
		||||
				extvalidation.SysctlPatternFmt,
 | 
			
		||||
			)
 | 
			
		||||
		}
 | 
			
		||||
		if strings.HasSuffix(s, "*") {
 | 
			
		||||
			prefix := s[:len(s)-1]
 | 
			
		||||
			ns := NamespacedBy(prefix)
 | 
			
		||||
			if ns == UnknownNamespace {
 | 
			
		||||
				return nil, fmt.Errorf("the sysctls %q are not known to be namespaced", s)
 | 
			
		||||
			}
 | 
			
		||||
			w.prefixes[prefix] = ns
 | 
			
		||||
		} else {
 | 
			
		||||
			ns := NamespacedBy(s)
 | 
			
		||||
			if ns == UnknownNamespace {
 | 
			
		||||
				return nil, fmt.Errorf("the sysctl %q are not known to be namespaced", s)
 | 
			
		||||
			}
 | 
			
		||||
			w.sysctls[s] = ns
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
	return w, nil
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// validateSysctl checks that a sysctl is whitelisted because it is known
 | 
			
		||||
// to be namespaced by the Linux kernel. Note that being whitelisted is required, but not
 | 
			
		||||
// sufficient: the container runtime might have a stricter check and refuse to launch a pod.
 | 
			
		||||
//
 | 
			
		||||
// The parameters hostNet and hostIPC are used to forbid sysctls for pod sharing the
 | 
			
		||||
// respective namespaces with the host. This check is only possible for sysctls on
 | 
			
		||||
// the static default whitelist, not those on the custom whitelist provided by the admin.
 | 
			
		||||
func (w *patternWhitelist) validateSysctl(sysctl string, hostNet, hostIPC bool) error {
 | 
			
		||||
	nsErrorFmt := "%q not allowed with host %s enabled"
 | 
			
		||||
	if ns, found := w.sysctls[sysctl]; found {
 | 
			
		||||
		if ns == IpcNamespace && hostIPC {
 | 
			
		||||
			return fmt.Errorf(nsErrorFmt, sysctl, ns)
 | 
			
		||||
		}
 | 
			
		||||
		if ns == NetNamespace && hostNet {
 | 
			
		||||
			return fmt.Errorf(nsErrorFmt, sysctl, ns)
 | 
			
		||||
		}
 | 
			
		||||
		return nil
 | 
			
		||||
	}
 | 
			
		||||
	for p, ns := range w.prefixes {
 | 
			
		||||
		if strings.HasPrefix(sysctl, p) {
 | 
			
		||||
			if ns == IpcNamespace && hostIPC {
 | 
			
		||||
				return fmt.Errorf(nsErrorFmt, sysctl, ns)
 | 
			
		||||
			}
 | 
			
		||||
			if ns == NetNamespace && hostNet {
 | 
			
		||||
				return fmt.Errorf(nsErrorFmt, sysctl, ns)
 | 
			
		||||
			}
 | 
			
		||||
			return nil
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
	return fmt.Errorf("%q not whitelisted", sysctl)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Admit checks that all sysctls given in a api.SysctlsPodAnnotationKey annotation
 | 
			
		||||
// are valid according to the whitelist.
 | 
			
		||||
func (w *patternWhitelist) Admit(attrs *lifecycle.PodAdmitAttributes) lifecycle.PodAdmitResult {
 | 
			
		||||
	pod := attrs.Pod
 | 
			
		||||
	a := pod.Annotations[w.annotationKey]
 | 
			
		||||
	if a == "" {
 | 
			
		||||
		return lifecycle.PodAdmitResult{
 | 
			
		||||
			Admit: true,
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	sysctls, err := api.SysctlsFromPodAnnotation(a)
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		return lifecycle.PodAdmitResult{
 | 
			
		||||
			Admit:   false,
 | 
			
		||||
			Reason:  AnnotationInvalidReason,
 | 
			
		||||
			Message: fmt.Sprintf("invalid %s annotation: %v", w.annotationKey, err),
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	var hostNet, hostIPC bool
 | 
			
		||||
	if pod.Spec.SecurityContext != nil {
 | 
			
		||||
		hostNet = pod.Spec.SecurityContext.HostNetwork
 | 
			
		||||
		hostIPC = pod.Spec.SecurityContext.HostIPC
 | 
			
		||||
	}
 | 
			
		||||
	for _, s := range sysctls {
 | 
			
		||||
		if err := w.validateSysctl(s.Name, hostNet, hostIPC); err != nil {
 | 
			
		||||
			return lifecycle.PodAdmitResult{
 | 
			
		||||
				Admit:   false,
 | 
			
		||||
				Reason:  ForbiddenReason,
 | 
			
		||||
				Message: fmt.Sprintf("forbidden sysctl: %v", err),
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	return lifecycle.PodAdmitResult{
 | 
			
		||||
		Admit: true,
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										84
									
								
								pkg/kubelet/sysctl/whitelist_test.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										84
									
								
								pkg/kubelet/sysctl/whitelist_test.go
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,84 @@
 | 
			
		||||
/*
 | 
			
		||||
Copyright 2016 The Kubernetes Authors.
 | 
			
		||||
 | 
			
		||||
Licensed under the Apache License, Version 2.0 (the "License");
 | 
			
		||||
you may not use this file except in compliance with the License.
 | 
			
		||||
You may obtain a copy of the License at
 | 
			
		||||
 | 
			
		||||
    http://www.apache.org/licenses/LICENSE-2.0
 | 
			
		||||
 | 
			
		||||
Unless required by applicable law or agreed to in writing, software
 | 
			
		||||
distributed under the License is distributed on an "AS IS" BASIS,
 | 
			
		||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
			
		||||
See the License for the specific language governing permissions and
 | 
			
		||||
limitations under the License.
 | 
			
		||||
*/
 | 
			
		||||
 | 
			
		||||
package sysctl
 | 
			
		||||
 | 
			
		||||
import (
 | 
			
		||||
	"testing"
 | 
			
		||||
 | 
			
		||||
	"k8s.io/kubernetes/pkg/api"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
func TestNewWhitelist(t *testing.T) {
 | 
			
		||||
	type Test struct {
 | 
			
		||||
		sysctls []string
 | 
			
		||||
		err     bool
 | 
			
		||||
	}
 | 
			
		||||
	for _, test := range []Test{
 | 
			
		||||
		{sysctls: []string{"kernel.msg*", "kernel.sem"}},
 | 
			
		||||
		{sysctls: []string{" kernel.msg*"}, err: true},
 | 
			
		||||
		{sysctls: []string{"kernel.msg* "}, err: true},
 | 
			
		||||
		{sysctls: []string{"net.-"}, err: true},
 | 
			
		||||
		{sysctls: []string{"net.*.foo"}, err: true},
 | 
			
		||||
		{sysctls: []string{"foo"}, err: true},
 | 
			
		||||
	} {
 | 
			
		||||
		_, err := NewWhitelist(append(SafeSysctlWhitelist(), test.sysctls...), api.SysctlsPodAnnotationKey)
 | 
			
		||||
		if test.err && err == nil {
 | 
			
		||||
			t.Errorf("expected an error creating a whitelist for %v", test.sysctls)
 | 
			
		||||
		} else if !test.err && err != nil {
 | 
			
		||||
			t.Errorf("got unexpected error creating a whitelist for %v: %v", test.sysctls, err)
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func TestWhitelist(t *testing.T) {
 | 
			
		||||
	type Test struct {
 | 
			
		||||
		sysctl           string
 | 
			
		||||
		hostNet, hostIPC bool
 | 
			
		||||
	}
 | 
			
		||||
	valid := []Test{
 | 
			
		||||
		{sysctl: "kernel.shm_rmid_forced"},
 | 
			
		||||
		{sysctl: "net.ipv4.ip_local_port_range"},
 | 
			
		||||
		{sysctl: "kernel.msgmax"},
 | 
			
		||||
		{sysctl: "kernel.sem"},
 | 
			
		||||
	}
 | 
			
		||||
	invalid := []Test{
 | 
			
		||||
		{sysctl: "kernel.shm_rmid_forced", hostIPC: true},
 | 
			
		||||
		{sysctl: "net.ipv4.ip_local_port_range", hostNet: true},
 | 
			
		||||
		{sysctl: "foo"},
 | 
			
		||||
		{sysctl: "net.a.b.c", hostNet: false},
 | 
			
		||||
		{sysctl: "net.ipv4.ip_local_port_range.a.b.c", hostNet: false},
 | 
			
		||||
		{sysctl: "kernel.msgmax", hostIPC: true},
 | 
			
		||||
		{sysctl: "kernel.sem", hostIPC: true},
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	w, err := NewWhitelist(append(SafeSysctlWhitelist(), "kernel.msg*", "kernel.sem"), api.SysctlsPodAnnotationKey)
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		t.Fatalf("failed to create whitelist: %v", err)
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	for _, test := range valid {
 | 
			
		||||
		if err := w.validateSysctl(test.sysctl, test.hostNet, test.hostIPC); err != nil {
 | 
			
		||||
			t.Errorf("expected to be whitelisted: %+v, got: %v", test, err)
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	for _, test := range invalid {
 | 
			
		||||
		if err := w.validateSysctl(test.sysctl, test.hostNet, test.hostIPC); err == nil {
 | 
			
		||||
			t.Errorf("expected to be rejected: %+v", test)
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
		Reference in New Issue
	
	Block a user