mirror of
				https://github.com/optim-enterprises-bv/kubernetes.git
				synced 2025-10-31 02:08:13 +00:00 
			
		
		
		
	Implement shared PID namespace in the dockershim
This commit is contained in:
		| @@ -309,6 +309,7 @@ func (c *kubeletConfiguration) addFlags(fs *pflag.FlagSet) { | ||||
|  | ||||
| 	fs.StringVar(&c.RemoteRuntimeEndpoint, "container-runtime-endpoint", c.RemoteRuntimeEndpoint, "[Experimental] The unix socket endpoint of remote runtime service. The endpoint is used only when CRI integration is enabled (--enable-cri)") | ||||
| 	fs.StringVar(&c.RemoteImageEndpoint, "image-service-endpoint", c.RemoteImageEndpoint, "[Experimental] The unix socket endpoint of remote image service. If not specified, it will be the same with container-runtime-endpoint by default. The endpoint is used only when CRI integration is enabled (--enable-cri)") | ||||
| 	fs.BoolVar(&c.DockerEnableSharedPID, "experimental-docker-enable-shared-pid", c.DockerEnableSharedPID, "[Experimental] The Container Runtime Interface (CRI) will eventually default to using a shared PID namespace for containers in a pod. Setting this flag allows previewing this behavior when running with the CRI enabled and Docker version 1.13.1 or higher.") | ||||
|  | ||||
| 	fs.BoolVar(&c.ExperimentalCheckNodeCapabilitiesBeforeMount, "experimental-check-node-capabilities-before-mount", c.ExperimentalCheckNodeCapabilitiesBeforeMount, "[Experimental] if set true, the kubelet will check the underlying node for required componenets (binaries, etc.) before performing the mount") | ||||
|  | ||||
|   | ||||
| @@ -976,7 +976,8 @@ func RunDockershim(c *componentconfig.KubeletConfiguration, dockershimRootDir st | ||||
| 	} | ||||
|  | ||||
| 	ds, err := dockershim.NewDockerService(dockerClient, c.SeccompProfileRoot, c.PodInfraContainerImage, | ||||
| 		streamingConfig, &pluginSettings, c.RuntimeCgroups, c.CgroupDriver, dockerExecHandler, dockershimRootDir) | ||||
| 		streamingConfig, &pluginSettings, c.RuntimeCgroups, c.CgroupDriver, dockerExecHandler, dockershimRootDir, | ||||
| 		!c.DockerEnableSharedPID) | ||||
| 	if err != nil { | ||||
| 		return err | ||||
| 	} | ||||
|   | ||||
| @@ -244,6 +244,7 @@ experimental-check-node-capabilities-before-mount | ||||
| experimental-cri | ||||
| experimental-dockershim | ||||
| experimental-dockershim-root-directory | ||||
| experimental-docker-enable-shared-pid | ||||
| experimental-fail-swap-on | ||||
| experimental-kernel-memcg-notification | ||||
| experimental-keystone-ca-file | ||||
|   | ||||
| @@ -481,6 +481,11 @@ type KubeletConfiguration struct { | ||||
| 	// This flag, if set, instructs the kubelet to keep volumes from terminated pods mounted to the node. | ||||
| 	// This can be useful for debugging volume related issues. | ||||
| 	KeepTerminatedPodVolumes bool | ||||
| 	// This flag, if set, enables use of a shared PID namespace for pods running in the docker CRI runtime. | ||||
| 	// A shared PID namespace is the only option in non-docker runtimes and is required by the CRI. The ability to | ||||
| 	// disable it for docker will be removed unless a compelling use case is discovered with widespread use. | ||||
| 	// TODO: Remove once we no longer support disabling shared PID namespace (https://issues.k8s.io/41938) | ||||
| 	DockerEnableSharedPID bool | ||||
|  | ||||
| 	/* following flags are meant for Node Allocatable */ | ||||
|  | ||||
|   | ||||
| @@ -536,6 +536,8 @@ type KubeletConfiguration struct { | ||||
| 	// This flag, if set, instructs the kubelet to keep volumes from terminated pods mounted to the node. | ||||
| 	// This can be useful for debugging volume related issues. | ||||
| 	KeepTerminatedPodVolumes bool `json:"keepTerminatedPodVolumes,omitempty"` | ||||
| 	// This flag, if set, enables use of a shared PID namespace for pods run by the docker CRI runtime. | ||||
| 	DockerEnableSharedPID *bool `json:"dockerEnableSharedPID,omitempty"` | ||||
|  | ||||
| 	/* following flags are meant for Node Allocatable */ | ||||
|  | ||||
|   | ||||
| @@ -436,6 +436,9 @@ func autoConvert_v1alpha1_KubeletConfiguration_To_componentconfig_KubeletConfigu | ||||
| 	out.ExperimentalFailSwapOn = in.ExperimentalFailSwapOn | ||||
| 	out.ExperimentalCheckNodeCapabilitiesBeforeMount = in.ExperimentalCheckNodeCapabilitiesBeforeMount | ||||
| 	out.KeepTerminatedPodVolumes = in.KeepTerminatedPodVolumes | ||||
| 	if err := v1.Convert_Pointer_bool_To_bool(&in.DockerEnableSharedPID, &out.DockerEnableSharedPID, s); err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	out.SystemReserved = *(*componentconfig.ConfigurationMap)(unsafe.Pointer(&in.SystemReserved)) | ||||
| 	out.KubeReserved = *(*componentconfig.ConfigurationMap)(unsafe.Pointer(&in.KubeReserved)) | ||||
| 	out.SystemReservedCgroup = in.SystemReservedCgroup | ||||
| @@ -637,6 +640,9 @@ func autoConvert_componentconfig_KubeletConfiguration_To_v1alpha1_KubeletConfigu | ||||
| 	out.ExperimentalFailSwapOn = in.ExperimentalFailSwapOn | ||||
| 	out.ExperimentalCheckNodeCapabilitiesBeforeMount = in.ExperimentalCheckNodeCapabilitiesBeforeMount | ||||
| 	out.KeepTerminatedPodVolumes = in.KeepTerminatedPodVolumes | ||||
| 	if err := v1.Convert_bool_To_Pointer_bool(&in.DockerEnableSharedPID, &out.DockerEnableSharedPID, s); err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	out.SystemReserved = *(*map[string]string)(unsafe.Pointer(&in.SystemReserved)) | ||||
| 	out.KubeReserved = *(*map[string]string)(unsafe.Pointer(&in.KubeReserved)) | ||||
| 	out.SystemReservedCgroup = in.SystemReservedCgroup | ||||
|   | ||||
| @@ -303,6 +303,11 @@ func DeepCopy_v1alpha1_KubeletConfiguration(in interface{}, out interface{}, c * | ||||
| 			*out = new(bool) | ||||
| 			**out = **in | ||||
| 		} | ||||
| 		if in.DockerEnableSharedPID != nil { | ||||
| 			in, out := &in.DockerEnableSharedPID, &out.DockerEnableSharedPID | ||||
| 			*out = new(bool) | ||||
| 			**out = **in | ||||
| 		} | ||||
| 		if in.SystemReserved != nil { | ||||
| 			in, out := &in.SystemReserved, &out.SystemReserved | ||||
| 			*out = make(map[string]string) | ||||
|   | ||||
| @@ -163,6 +163,7 @@ func (ds *dockerService) CreateContainer(podSandboxID string, config *runtimeapi | ||||
|  | ||||
| 		// Apply security context. | ||||
| 		applyContainerSecurityContext(lc, podSandboxID, createConfig.Config, hc, securityOptSep) | ||||
| 		modifyPIDNamespaceOverrides(ds.disableSharedPID, apiVersion, hc) | ||||
| 	} | ||||
|  | ||||
| 	// Apply cgroupsParent derived from the sandbox config. | ||||
|   | ||||
| @@ -147,7 +147,7 @@ var internalLabelKeys []string = []string{containerTypeLabelKey, containerLogPat | ||||
|  | ||||
| // NOTE: Anything passed to DockerService should be eventually handled in another way when we switch to running the shim as a different process. | ||||
| func NewDockerService(client dockertools.DockerInterface, seccompProfileRoot string, podSandboxImage string, streamingConfig *streaming.Config, | ||||
| 	pluginSettings *NetworkPluginSettings, cgroupsName string, kubeCgroupDriver string, execHandler dockertools.ExecHandler, dockershimRootDir string) (DockerService, error) { | ||||
| 	pluginSettings *NetworkPluginSettings, cgroupsName string, kubeCgroupDriver string, execHandler dockertools.ExecHandler, dockershimRootDir string, disableSharedPID bool) (DockerService, error) { | ||||
| 	c := dockertools.NewInstrumentedDockerInterface(client) | ||||
| 	checkpointHandler, err := NewPersistentCheckpointHandler(dockershimRootDir) | ||||
| 	if err != nil { | ||||
| @@ -164,6 +164,7 @@ func NewDockerService(client dockertools.DockerInterface, seccompProfileRoot str | ||||
| 		}, | ||||
| 		containerManager:  cm.NewContainerManager(cgroupsName, client), | ||||
| 		checkpointHandler: checkpointHandler, | ||||
| 		disableSharedPID:  disableSharedPID, | ||||
| 	} | ||||
|  | ||||
| 	// check docker version compatibility. | ||||
| @@ -249,6 +250,11 @@ type dockerService struct { | ||||
| 	// version checking for some operations. Use this cache to avoid querying | ||||
| 	// the docker daemon every time we need to do such checks. | ||||
| 	versionCache *cache.ObjectCache | ||||
| 	// This option provides an escape hatch to override the new default behavior for Docker under | ||||
| 	// the CRI to use a shared PID namespace for all pods. It is temporary and will be removed. | ||||
| 	// See proposals/pod-pid-namespace.md for details. | ||||
| 	// TODO: Remove once the escape hatch is no longer used (https://issues.k8s.io/41938) | ||||
| 	disableSharedPID bool | ||||
| } | ||||
|  | ||||
| // Version returns the runtime name, runtime version and runtime API version | ||||
|   | ||||
| @@ -19,7 +19,9 @@ package dockershim | ||||
| import ( | ||||
| 	"fmt" | ||||
| 	"strconv" | ||||
| 	"strings" | ||||
|  | ||||
| 	"github.com/blang/semver" | ||||
| 	dockercontainer "github.com/docker/engine-api/types/container" | ||||
|  | ||||
| 	"k8s.io/kubernetes/pkg/api/v1" | ||||
| @@ -123,6 +125,7 @@ func modifyContainerNamespaceOptions(nsOpts *runtimeapi.NamespaceOption, sandbox | ||||
| 	if nsOpts != nil { | ||||
| 		hostNetwork = nsOpts.HostNetwork | ||||
| 	} | ||||
| 	hostConfig.PidMode = dockercontainer.PidMode(fmt.Sprintf("container:%v", sandboxID)) | ||||
| 	modifyCommonNamespaceOptions(nsOpts, hostConfig) | ||||
| 	modifyHostNetworkOptionForContainer(hostNetwork, sandboxID, hostConfig) | ||||
| } | ||||
| @@ -172,3 +175,19 @@ func modifyHostNetworkOptionForContainer(hostNetwork bool, sandboxID string, hc | ||||
| 		hc.UTSMode = namespaceModeHost | ||||
| 	} | ||||
| } | ||||
|  | ||||
| // modifyPIDNamespaceOverrides implements two temporary overrides for the default PID namespace sharing for Docker: | ||||
| //     1. Docker engine prior to API Version 1.24 doesn't support attaching to another container's | ||||
| //        PID namespace, and it didn't stabilize until 1.26. This check can be removed when Kubernetes' | ||||
| //        minimum Docker version is at least 1.13.1 (API version 1.26). | ||||
| //     2. The administrator has overridden the default behavior by means of a kubelet flag. This is an | ||||
| //        "escape hatch" to return to previous behavior of isolated namespaces and should be removed once | ||||
| //        no longer needed. | ||||
| func modifyPIDNamespaceOverrides(disableSharedPID bool, version *semver.Version, hc *dockercontainer.HostConfig) { | ||||
| 	if !strings.HasPrefix(string(hc.PidMode), "container:") { | ||||
| 		return | ||||
| 	} | ||||
| 	if disableSharedPID || version.LT(semver.Version{Major: 1, Minor: 26}) { | ||||
| 		hc.PidMode = "" | ||||
| 	} | ||||
| } | ||||
|   | ||||
| @@ -21,6 +21,7 @@ import ( | ||||
| 	"strconv" | ||||
| 	"testing" | ||||
|  | ||||
| 	"github.com/blang/semver" | ||||
| 	dockercontainer "github.com/docker/engine-api/types/container" | ||||
| 	"github.com/stretchr/testify/assert" | ||||
|  | ||||
| @@ -172,12 +173,14 @@ func TestModifyHostConfigAndNamespaceOptionsForContainer(t *testing.T) { | ||||
| 		Privileged:  true, | ||||
| 		IpcMode:     dockercontainer.IpcMode(sandboxNSMode), | ||||
| 		NetworkMode: dockercontainer.NetworkMode(sandboxNSMode), | ||||
| 		PidMode:     dockercontainer.PidMode(sandboxNSMode), | ||||
| 	} | ||||
| 	setCapsHC := &dockercontainer.HostConfig{ | ||||
| 		CapAdd:      []string{"addCapA", "addCapB"}, | ||||
| 		CapDrop:     []string{"dropCapA", "dropCapB"}, | ||||
| 		IpcMode:     dockercontainer.IpcMode(sandboxNSMode), | ||||
| 		NetworkMode: dockercontainer.NetworkMode(sandboxNSMode), | ||||
| 		PidMode:     dockercontainer.PidMode(sandboxNSMode), | ||||
| 	} | ||||
| 	setSELinuxHC := &dockercontainer.HostConfig{ | ||||
| 		SecurityOpt: []string{ | ||||
| @@ -188,6 +191,7 @@ func TestModifyHostConfigAndNamespaceOptionsForContainer(t *testing.T) { | ||||
| 		}, | ||||
| 		IpcMode:     dockercontainer.IpcMode(sandboxNSMode), | ||||
| 		NetworkMode: dockercontainer.NetworkMode(sandboxNSMode), | ||||
| 		PidMode:     dockercontainer.PidMode(sandboxNSMode), | ||||
| 	} | ||||
|  | ||||
| 	cases := []struct { | ||||
| @@ -286,6 +290,7 @@ func TestModifyContainerNamespaceOptions(t *testing.T) { | ||||
| 				NetworkMode: dockercontainer.NetworkMode(sandboxNSMode), | ||||
| 				IpcMode:     dockercontainer.IpcMode(sandboxNSMode), | ||||
| 				UTSMode:     namespaceModeHost, | ||||
| 				PidMode:     dockercontainer.PidMode(sandboxNSMode), | ||||
| 			}, | ||||
| 		}, | ||||
| 		{ | ||||
| @@ -296,6 +301,7 @@ func TestModifyContainerNamespaceOptions(t *testing.T) { | ||||
| 			expected: &dockercontainer.HostConfig{ | ||||
| 				NetworkMode: dockercontainer.NetworkMode(sandboxNSMode), | ||||
| 				IpcMode:     dockercontainer.IpcMode(sandboxNSMode), | ||||
| 				PidMode:     dockercontainer.PidMode(sandboxNSMode), | ||||
| 			}, | ||||
| 		}, | ||||
| 		{ | ||||
| @@ -317,6 +323,63 @@ func TestModifyContainerNamespaceOptions(t *testing.T) { | ||||
| 	} | ||||
| } | ||||
|  | ||||
| func TestModifyContainerNamespacePIDOverride(t *testing.T) { | ||||
| 	cases := []struct { | ||||
| 		name            string | ||||
| 		disable         bool | ||||
| 		version         *semver.Version | ||||
| 		input, expected dockercontainer.PidMode | ||||
| 	}{ | ||||
| 		{ | ||||
| 			name:     "SharedPID.Enable", | ||||
| 			disable:  false, | ||||
| 			version:  &semver.Version{Major: 1, Minor: 26}, | ||||
| 			input:    "container:sandbox", | ||||
| 			expected: "container:sandbox", | ||||
| 		}, | ||||
| 		{ | ||||
| 			name:     "SharedPID.Disable", | ||||
| 			disable:  true, | ||||
| 			version:  &semver.Version{Major: 1, Minor: 26}, | ||||
| 			input:    "container:sandbox", | ||||
| 			expected: "", | ||||
| 		}, | ||||
| 		{ | ||||
| 			name:     "SharedPID.OldDocker", | ||||
| 			disable:  false, | ||||
| 			version:  &semver.Version{Major: 1, Minor: 25}, | ||||
| 			input:    "container:sandbox", | ||||
| 			expected: "", | ||||
| 		}, | ||||
| 		{ | ||||
| 			name:     "SharedPID.HostPid", | ||||
| 			disable:  true, | ||||
| 			version:  &semver.Version{Major: 1, Minor: 27}, | ||||
| 			input:    "host", | ||||
| 			expected: "host", | ||||
| 		}, | ||||
| 		{ | ||||
| 			name:     "SharedPID.DistantFuture", | ||||
| 			disable:  false, | ||||
| 			version:  &semver.Version{Major: 2, Minor: 10}, | ||||
| 			input:    "container:sandbox", | ||||
| 			expected: "container:sandbox", | ||||
| 		}, | ||||
| 		{ | ||||
| 			name:     "SharedPID.EmptyPidMode", | ||||
| 			disable:  true, | ||||
| 			version:  &semver.Version{Major: 1, Minor: 25}, | ||||
| 			input:    "", | ||||
| 			expected: "", | ||||
| 		}, | ||||
| 	} | ||||
| 	for _, tc := range cases { | ||||
| 		dockerCfg := &dockercontainer.HostConfig{PidMode: tc.input} | ||||
| 		modifyPIDNamespaceOverrides(tc.disable, tc.version, dockerCfg) | ||||
| 		assert.Equal(t, tc.expected, dockerCfg.PidMode, "[Test case %q]", tc.name) | ||||
| 	} | ||||
| } | ||||
|  | ||||
| func fullValidSecurityContext() *runtimeapi.LinuxContainerSecurityContext { | ||||
| 	return &runtimeapi.LinuxContainerSecurityContext{ | ||||
| 		Privileged:     true, | ||||
|   | ||||
| @@ -553,7 +553,8 @@ func NewMainKubelet(kubeCfg *componentconfig.KubeletConfiguration, kubeDeps *Kub | ||||
| 			// Create and start the CRI shim running as a grpc server. | ||||
| 			streamingConfig := getStreamingConfig(kubeCfg, kubeDeps) | ||||
| 			ds, err := dockershim.NewDockerService(klet.dockerClient, kubeCfg.SeccompProfileRoot, kubeCfg.PodInfraContainerImage, | ||||
| 				streamingConfig, &pluginSettings, kubeCfg.RuntimeCgroups, kubeCfg.CgroupDriver, dockerExecHandler, dockershimRootDir) | ||||
| 				streamingConfig, &pluginSettings, kubeCfg.RuntimeCgroups, kubeCfg.CgroupDriver, dockerExecHandler, dockershimRootDir, | ||||
| 				!kubeCfg.DockerEnableSharedPID) | ||||
| 			if err != nil { | ||||
| 				return nil, err | ||||
| 			} | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Lee Verberne
					Lee Verberne