From 0d71dc677e74f3bf42db578f6d88bf818cdeafeb Mon Sep 17 00:00:00 2001 From: Jan Safranek Date: Mon, 14 Oct 2024 17:47:02 +0200 Subject: [PATCH] Refactor CreateVolumeSpec Rename old CreateVolumeSpec to CreateVolumeSpecWithNodeMigration that extracts volume.Spec with node specific CSI migration. Add CreateVolumeSpec that does the same, only without evaluating node CSI migration. --- .../attachdetach/attach_detach_controller.go | 2 +- .../volume/attachdetach/metrics/metrics.go | 2 +- .../volume/attachdetach/util/util.go | 154 ++++---- .../volume/attachdetach/util/util_test.go | 2 +- .../cache/desired_state_of_world.go | 60 +--- pkg/volume/testing/testing.go | 3 +- pkg/volume/util/selinux.go | 109 +++++- pkg/volume/util/selinux_test.go | 333 ++++++++++++++++++ 8 files changed, 547 insertions(+), 118 deletions(-) create mode 100644 pkg/volume/util/selinux_test.go diff --git a/pkg/controller/volume/attachdetach/attach_detach_controller.go b/pkg/controller/volume/attachdetach/attach_detach_controller.go index 76eb4bf6ade..667804d1f55 100644 --- a/pkg/controller/volume/attachdetach/attach_detach_controller.go +++ b/pkg/controller/volume/attachdetach/attach_detach_controller.go @@ -437,7 +437,7 @@ func (adc *attachDetachController) populateDesiredStateOfWorld(logger klog.Logge // The volume specs present in the ActualStateOfWorld are nil, let's replace those // with the correct ones found on pods. The present in the ASW with no corresponding // pod will be detached and the spec is irrelevant. - volumeSpec, err := util.CreateVolumeSpec(logger, podVolume, podToAdd, nodeName, &adc.volumePluginMgr, adc.pvcLister, adc.pvLister, adc.csiMigratedPluginManager, adc.intreeToCSITranslator) + volumeSpec, err := util.CreateVolumeSpecWithNodeMigration(logger, podVolume, podToAdd, nodeName, &adc.volumePluginMgr, adc.pvcLister, adc.pvLister, adc.csiMigratedPluginManager, adc.intreeToCSITranslator) if err != nil { logger.Error( err, diff --git a/pkg/controller/volume/attachdetach/metrics/metrics.go b/pkg/controller/volume/attachdetach/metrics/metrics.go index f0c487a7a2b..709ef9d124d 100644 --- a/pkg/controller/volume/attachdetach/metrics/metrics.go +++ b/pkg/controller/volume/attachdetach/metrics/metrics.go @@ -181,7 +181,7 @@ func (collector *attachDetachStateCollector) getVolumeInUseCount(logger klog.Log continue } for _, podVolume := range pod.Spec.Volumes { - volumeSpec, err := util.CreateVolumeSpec(logger, podVolume, pod, types.NodeName(pod.Spec.NodeName), collector.volumePluginMgr, collector.pvcLister, collector.pvLister, collector.csiMigratedPluginManager, collector.intreeToCSITranslator) + volumeSpec, err := util.CreateVolumeSpecWithNodeMigration(logger, podVolume, pod, types.NodeName(pod.Spec.NodeName), collector.volumePluginMgr, collector.pvcLister, collector.pvLister, collector.csiMigratedPluginManager, collector.intreeToCSITranslator) if err != nil { continue } diff --git a/pkg/controller/volume/attachdetach/util/util.go b/pkg/controller/volume/attachdetach/util/util.go index b075382e2d3..a47b431b606 100644 --- a/pkg/controller/volume/attachdetach/util/util.go +++ b/pkg/controller/volume/attachdetach/util/util.go @@ -33,12 +33,7 @@ import ( "k8s.io/kubernetes/pkg/volume/util" ) -// CreateVolumeSpec creates and returns a mutatable volume.Spec object for the -// specified volume. It dereference any PVC to get PV objects, if needed. -// A volume.Spec that refers to an in-tree plugin spec is translated to refer -// to a migrated CSI plugin spec if all conditions for CSI migration on a node -// for the in-tree plugin is satisfied. -func CreateVolumeSpec(logger klog.Logger, podVolume v1.Volume, pod *v1.Pod, nodeName types.NodeName, vpm *volume.VolumePluginMgr, pvcLister corelisters.PersistentVolumeClaimLister, pvLister corelisters.PersistentVolumeLister, csiMigratedPluginManager csimigration.PluginManager, csiTranslator csimigration.InTreeToCSITranslator) (*volume.Spec, error) { +func createInTreeVolumeSpec(logger klog.Logger, podVolume *v1.Volume, pod *v1.Pod, vpm *volume.VolumePluginMgr, pvcLister corelisters.PersistentVolumeClaimLister, pvLister corelisters.PersistentVolumeLister, csiMigratedPluginManager csimigration.PluginManager, csiTranslator csimigration.InTreeToCSITranslator) (*volume.Spec, string, error) { claimName := "" readOnly := false if pvcSource := podVolume.VolumeSource.PersistentVolumeClaim; pvcSource != nil { @@ -47,67 +42,83 @@ func CreateVolumeSpec(logger klog.Logger, podVolume v1.Volume, pod *v1.Pod, node } isEphemeral := podVolume.VolumeSource.Ephemeral != nil if isEphemeral { - claimName = ephemeral.VolumeClaimName(pod, &podVolume) + claimName = ephemeral.VolumeClaimName(pod, podVolume) } - if claimName != "" { - logger.V(10).Info("Found PVC", "PVC", klog.KRef(pod.Namespace, claimName)) - - // If podVolume is a PVC, fetch the real PV behind the claim - pvc, err := getPVCFromCache(pod.Namespace, claimName, pvcLister) - if err != nil { - return nil, fmt.Errorf( - "error processing PVC %q/%q: %v", - pod.Namespace, - claimName, - err) - } - if isEphemeral { - if err := ephemeral.VolumeIsForPod(pod, pvc); err != nil { - return nil, err - } - } - - pvName, pvcUID := pvc.Spec.VolumeName, pvc.UID - logger.V(10).Info("Found bound PV for PVC", "PVC", klog.KRef(pod.Namespace, claimName), "pvcUID", pvcUID, "PV", klog.KRef("", pvName)) - - // Fetch actual PV object - volumeSpec, err := getPVSpecFromCache( - pvName, readOnly, pvcUID, pvLister) - if err != nil { - return nil, fmt.Errorf( - "error processing PVC %q/%q: %v", - pod.Namespace, - claimName, - err) - } - - volumeSpec, err = translateInTreeSpecToCSIIfNeeded(logger, volumeSpec, nodeName, vpm, csiMigratedPluginManager, csiTranslator, pod.Namespace) - if err != nil { - return nil, fmt.Errorf( - "error performing CSI migration checks and translation for PVC %q/%q: %v", - pod.Namespace, - claimName, - err) - } - - logger.V(10).Info("Extracted volumeSpec from bound PV and PVC", "PVC", klog.KRef(pod.Namespace, claimName), "pvcUID", pvcUID, "PV", klog.KRef("", pvName), "volumeSpecName", volumeSpec.Name()) - - return volumeSpec, nil + if claimName == "" { + // In-line volume + return volume.NewSpecFromVolume(podVolume), "", nil } + // The volume is a PVC, dereference the PVC + PV + logger.V(10).Info("Found PVC", "PVC", klog.KRef(pod.Namespace, claimName)) - // Do not return the original volume object, since it's from the shared - // informer it may be mutated by another consumer. - clonedPodVolume := podVolume.DeepCopy() - - origspec := volume.NewSpecFromVolume(clonedPodVolume) - spec, err := translateInTreeSpecToCSIIfNeeded(logger, origspec, nodeName, vpm, csiMigratedPluginManager, csiTranslator, pod.Namespace) + // If podVolume is a PVC, fetch the real PV behind the claim + pvc, err := getPVCFromCache(pod.Namespace, claimName, pvcLister) if err != nil { - return nil, fmt.Errorf( - "error performing CSI migration checks and translation for inline volume %q: %v", - podVolume.Name, + return nil, claimName, fmt.Errorf( + "error processing PVC %q/%q: %v", + pod.Namespace, + claimName, err) } - return spec, nil + if isEphemeral { + if err := ephemeral.VolumeIsForPod(pod, pvc); err != nil { + return nil, claimName, err + } + } + + pvName, pvcUID := pvc.Spec.VolumeName, pvc.UID + logger.V(10).Info("Found bound PV for PVC", "PVC", klog.KRef(pod.Namespace, claimName), "pvcUID", pvcUID, "PV", klog.KRef("", pvName)) + + // Fetch actual PV object + volumeSpec, err := getPVSpecFromCache( + pvName, readOnly, pvcUID, pvLister) + if err != nil { + return nil, claimName, fmt.Errorf( + "error processing PVC %q/%q: %v", + pod.Namespace, + claimName, + err) + } + + logger.V(10).Info("Extracted volumeSpec from bound PV and PVC", "PVC", klog.KRef(pod.Namespace, claimName), "pvcUID", pvcUID, "PV", klog.KRef("", pvName), "volumeSpecName", volumeSpec.Name()) + return volumeSpec, claimName, nil +} + +func CreateVolumeSpec(logger klog.Logger, podVolume v1.Volume, pod *v1.Pod, vpm *volume.VolumePluginMgr, pvcLister corelisters.PersistentVolumeClaimLister, pvLister corelisters.PersistentVolumeLister, csiMigratedPluginManager csimigration.PluginManager, csiTranslator csimigration.InTreeToCSITranslator) (*volume.Spec, error) { + volumeSpec, claimName, err := createInTreeVolumeSpec(logger, &podVolume, pod, vpm, pvcLister, pvLister, csiMigratedPluginManager, csiTranslator) + if err != nil { + return nil, err + } + volumeSpec, err = translateInTreeSpecToCSIIfNeeded(logger, volumeSpec, vpm, csiMigratedPluginManager, csiTranslator, pod.Namespace) + if err != nil { + return nil, fmt.Errorf( + "error performing CSI migration checks and translation for PVC %q/%q: %v", + pod.Namespace, + claimName, + err) + } + return volumeSpec, nil +} + +// CreateVolumeSpec creates and returns a mutatable volume.Spec object for the +// specified volume. It dereference any PVC to get PV objects, if needed. +// A volume.Spec that refers to an in-tree plugin spec is translated to refer +// to a migrated CSI plugin spec if all conditions for CSI migration on a node +// for the in-tree plugin is satisfied. +func CreateVolumeSpecWithNodeMigration(logger klog.Logger, podVolume v1.Volume, pod *v1.Pod, nodeName types.NodeName, vpm *volume.VolumePluginMgr, pvcLister corelisters.PersistentVolumeClaimLister, pvLister corelisters.PersistentVolumeLister, csiMigratedPluginManager csimigration.PluginManager, csiTranslator csimigration.InTreeToCSITranslator) (*volume.Spec, error) { + volumeSpec, claimName, err := createInTreeVolumeSpec(logger, &podVolume, pod, vpm, pvcLister, pvLister, csiMigratedPluginManager, csiTranslator) + if err != nil { + return nil, err + } + volumeSpec, err = translateInTreeSpecToCSIOnNodeIfNeeded(logger, volumeSpec, nodeName, vpm, csiMigratedPluginManager, csiTranslator, pod.Namespace) + if err != nil { + return nil, fmt.Errorf( + "error performing CSI migration checks and translation for PVC %q/%q: %v", + pod.Namespace, + claimName, + err) + } + return volumeSpec, nil } // getPVCFromCache fetches the PVC object with the given namespace and @@ -144,7 +155,6 @@ func getPVSpecFromCache(name string, pvcReadOnly bool, expectedClaimUID types.UI if err != nil { return nil, fmt.Errorf("failed to find PV %q in PVInformer cache: %v", name, err) } - if pv.Spec.ClaimRef == nil { return nil, fmt.Errorf( "found PV object %q but it has a nil pv.Spec.ClaimRef indicating it is not yet bound to the claim", @@ -204,7 +214,7 @@ func ProcessPodVolumes(logger klog.Logger, pod *v1.Pod, addVolumes bool, desired // Process volume spec for each volume defined in pod for _, podVolume := range pod.Spec.Volumes { - volumeSpec, err := CreateVolumeSpec(logger, podVolume, pod, nodeName, volumePluginMgr, pvcLister, pvLister, csiMigratedPluginManager, csiTranslator) + volumeSpec, err := CreateVolumeSpecWithNodeMigration(logger, podVolume, pod, nodeName, volumePluginMgr, pvcLister, pvLister, csiMigratedPluginManager, csiTranslator) if err != nil { logger.V(10).Info("Error processing volume for pod", "pod", klog.KObj(pod), "volumeName", podVolume.Name, "err", err) continue @@ -240,7 +250,7 @@ func ProcessPodVolumes(logger klog.Logger, pod *v1.Pod, addVolumes bool, desired } } -func translateInTreeSpecToCSIIfNeeded(logger klog.Logger, spec *volume.Spec, nodeName types.NodeName, vpm *volume.VolumePluginMgr, csiMigratedPluginManager csimigration.PluginManager, csiTranslator csimigration.InTreeToCSITranslator, podNamespace string) (*volume.Spec, error) { +func translateInTreeSpecToCSIOnNodeIfNeeded(logger klog.Logger, spec *volume.Spec, nodeName types.NodeName, vpm *volume.VolumePluginMgr, csiMigratedPluginManager csimigration.PluginManager, csiTranslator csimigration.InTreeToCSITranslator, podNamespace string) (*volume.Spec, error) { translatedSpec := spec migratable, err := csiMigratedPluginManager.IsMigratable(spec) if err != nil { @@ -263,6 +273,22 @@ func translateInTreeSpecToCSIIfNeeded(logger klog.Logger, spec *volume.Spec, nod return translatedSpec, nil } +func translateInTreeSpecToCSIIfNeeded(logger klog.Logger, spec *volume.Spec, vpm *volume.VolumePluginMgr, csiMigratedPluginManager csimigration.PluginManager, csiTranslator csimigration.InTreeToCSITranslator, podNamespace string) (*volume.Spec, error) { + migratable, err := csiMigratedPluginManager.IsMigratable(spec) + if err != nil { + return nil, err + } + if !migratable { + // Jump out of translation fast so we don't check the node if the spec itself is not migratable + return spec, nil + } + translatedSpec, err := csimigration.TranslateInTreeSpecToCSI(logger, spec, podNamespace, csiTranslator) + if err != nil { + return nil, err + } + return translatedSpec, nil +} + func isCSIMigrationSupportedOnNode(nodeName types.NodeName, spec *volume.Spec, vpm *volume.VolumePluginMgr, csiMigratedPluginManager csimigration.PluginManager) (bool, error) { pluginName, err := csiMigratedPluginManager.GetInTreePluginNameFromSpec(spec.PersistentVolume, spec.Volume) if err != nil { diff --git a/pkg/controller/volume/attachdetach/util/util_test.go b/pkg/controller/volume/attachdetach/util/util_test.go index 7d5b8c9e8db..d14e2e3b89a 100644 --- a/pkg/controller/volume/attachdetach/util/util_test.go +++ b/pkg/controller/volume/attachdetach/util/util_test.go @@ -243,7 +243,7 @@ func Test_CreateVolumeSpec(t *testing.T) { t.Run(test.desc, func(t *testing.T) { logger, _ := ktesting.NewTestContext(t) plugMgr, intreeToCSITranslator, csiTranslator, pvLister, pvcLister := setup(testNodeName, t) - actualSpec, err := CreateVolumeSpec(logger, test.pod.Spec.Volumes[0], test.pod, test.createNodeName, plugMgr, pvcLister, pvLister, intreeToCSITranslator, csiTranslator) + actualSpec, err := CreateVolumeSpecWithNodeMigration(logger, test.pod.Spec.Volumes[0], test.pod, test.createNodeName, plugMgr, pvcLister, pvLister, intreeToCSITranslator, csiTranslator) if actualSpec == nil && (test.wantPersistentVolume != nil || test.wantVolume != nil) { t.Errorf("got volume spec is nil") diff --git a/pkg/kubelet/volumemanager/cache/desired_state_of_world.go b/pkg/kubelet/volumemanager/cache/desired_state_of_world.go index f59b58c4cfc..6daa49693f6 100644 --- a/pkg/kubelet/volumemanager/cache/desired_state_of_world.go +++ b/pkg/kubelet/volumemanager/cache/desired_state_of_world.go @@ -397,63 +397,31 @@ func (dsw *desiredStateOfWorld) AddPodToVolume( // It returns error if the SELinux label cannot be constructed or when the volume is used with multiple SELinux // labels. func (dsw *desiredStateOfWorld) getSELinuxLabel(volumeSpec *volume.Spec, seLinuxContainerContexts []*v1.SELinuxOptions, podSecurityContext *v1.PodSecurityContext) (seLinuxFileLabel string, pluginSupportsSELinuxContextMount bool, err error) { - if !dsw.seLinuxTranslator.SELinuxEnabled() { - return "", false, nil - } - - pluginSupportsSELinuxContextMount, err = dsw.getSELinuxMountSupport(volumeSpec) + labelInfo, err := util.GetMountSELinuxLabel(volumeSpec, seLinuxContainerContexts, podSecurityContext, dsw.volumePluginMgr, dsw.seLinuxTranslator) if err != nil { - return "", false, err - } + accessMode := getVolumeAccessMode(volumeSpec) + seLinuxSupported := util.VolumeSupportsSELinuxMount(volumeSpec) - if feature.DefaultFeatureGate.Enabled(features.SELinuxChangePolicy) && - podSecurityContext != nil && - podSecurityContext.SELinuxChangePolicy != nil && - *podSecurityContext.SELinuxChangePolicy == v1.SELinuxChangePolicyRecursive { - // The pod has opted into recursive SELinux label changes. Do not mount with -o context. - return "", pluginSupportsSELinuxContextMount, nil - } - - if !pluginSupportsSELinuxContextMount { - return "", pluginSupportsSELinuxContextMount, nil - } - - seLinuxSupported := util.VolumeSupportsSELinuxMount(volumeSpec) - // Ensure that a volume that can be mounted with "-o context=XYZ" is - // used only by containers with the same SELinux contexts. - for _, containerContext := range seLinuxContainerContexts { - newLabel, err := dsw.seLinuxTranslator.SELinuxOptionsToFileLabel(containerContext) - if err != nil { - fullErr := fmt.Errorf("failed to construct SELinux label from context %q: %w", containerContext, err) - accessMode := getVolumeAccessMode(volumeSpec) + if util.IsSELinuxLabelTranslationError(err) { err := handleSELinuxMetricError( - fullErr, + err, seLinuxSupported, seLinuxContainerContextWarnings.WithLabelValues(accessMode), seLinuxContainerContextErrors.WithLabelValues(accessMode)) - if err != nil { - return "", false, err - } + return "", labelInfo.PluginSupportsSELinuxContextMount, err } - if seLinuxFileLabel == "" { - seLinuxFileLabel = newLabel - continue - } - if seLinuxFileLabel != newLabel { - accessMode := getVolumeAccessMode(volumeSpec) - - fullErr := fmt.Errorf("volume %s is used with two different SELinux contexts in the same pod: %q, %q", volumeSpec.Name(), seLinuxFileLabel, newLabel) + if util.IsMultipleSELinuxLabelsError(err) { err := handleSELinuxMetricError( - fullErr, + err, seLinuxSupported, seLinuxPodContextMismatchWarnings.WithLabelValues(accessMode), seLinuxPodContextMismatchErrors.WithLabelValues(accessMode)) - if err != nil { - return "", false, err - } + return "", false, err } + return "", labelInfo.PluginSupportsSELinuxContextMount, err } - return seLinuxFileLabel, pluginSupportsSELinuxContextMount, nil + + return labelInfo.SELinuxMountLabel, labelInfo.PluginSupportsSELinuxContextMount, nil } func (dsw *desiredStateOfWorld) MarkVolumesReportedInUse( @@ -668,10 +636,6 @@ func (dsw *desiredStateOfWorld) MarkVolumeAttachability(volumeName v1.UniqueVolu dsw.volumesToMount[volumeName] = volumeObj } -func (dsw *desiredStateOfWorld) getSELinuxMountSupport(volumeSpec *volume.Spec) (bool, error) { - return util.SupportsSELinuxContextMount(volumeSpec, dsw.volumePluginMgr) -} - // Based on isRWOP, bump the right warning / error metric and either consume the error or return it. func handleSELinuxMetricError(err error, seLinuxSupported bool, warningMetric, errorMetric metrics.GaugeMetric) error { if seLinuxSupported { diff --git a/pkg/volume/testing/testing.go b/pkg/volume/testing/testing.go index 30c4fa730b9..fd1777c5a1f 100644 --- a/pkg/volume/testing/testing.go +++ b/pkg/volume/testing/testing.go @@ -40,7 +40,6 @@ import ( "k8s.io/apimachinery/pkg/util/uuid" utiltesting "k8s.io/client-go/util/testing" "k8s.io/kubernetes/pkg/volume" - "k8s.io/kubernetes/pkg/volume/util" "k8s.io/kubernetes/pkg/volume/util/recyclerclient" volumetypes "k8s.io/kubernetes/pkg/volume/util/types" "k8s.io/kubernetes/pkg/volume/util/volumepathhandler" @@ -1188,7 +1187,7 @@ func (fc *FakeProvisioner) Provision(selectedNode *v1.Node, allowedTopologies [] ObjectMeta: metav1.ObjectMeta{ Name: fc.Options.PVName, Annotations: map[string]string{ - util.VolumeDynamicallyCreatedByKey: "fakeplugin-provisioner", + "kubernetes.io/createdby": "fakeplugin-provisioner", }, }, Spec: v1.PersistentVolumeSpec{ diff --git a/pkg/volume/util/selinux.go b/pkg/volume/util/selinux.go index 9f567dbef74..d1a088cc4a2 100644 --- a/pkg/volume/util/selinux.go +++ b/pkg/volume/util/selinux.go @@ -17,11 +17,14 @@ limitations under the License. package util import ( + "errors" "fmt" + "strings" "github.com/opencontainers/selinux/go-selinux" "github.com/opencontainers/selinux/go-selinux/label" v1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/util/sets" utilfeature "k8s.io/apiserver/pkg/util/feature" v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper" "k8s.io/kubernetes/pkg/features" @@ -71,7 +74,7 @@ func (l *translator) SELinuxOptionsToFileLabel(opts *v1.SELinuxOptions) (string, if err != nil { // In theory, this should be unreachable. InitLabels can fail only when args contain an unknown option, // and all options returned by contextOptions are known. - return "", err + return "", &SELinuxLabelTranslationError{msg: err.Error()} } // InitLabels() may allocate a new unique SELinux label in kubelet memory. The label is *not* allocated // in the container runtime. Clear it to avoid memory problems. @@ -156,6 +159,19 @@ func (l *fakeTranslator) SELinuxEnabled() bool { return true } +type SELinuxLabelTranslationError struct { + msg string +} + +func (e *SELinuxLabelTranslationError) Error() string { + return e.msg +} + +func IsSELinuxLabelTranslationError(err error) bool { + var seLinuxError *SELinuxLabelTranslationError + return errors.As(err, &seLinuxError) +} + // SupportsSELinuxContextMount checks if the given volumeSpec supports with mount -o context func SupportsSELinuxContextMount(volumeSpec *volume.Spec, volumePluginMgr *volume.VolumePluginMgr) (bool, error) { plugin, _ := volumePluginMgr.FindPluginBySpec(volumeSpec) @@ -191,6 +207,24 @@ func VolumeSupportsSELinuxMount(volumeSpec *volume.Spec) bool { return true } +// MultipleSELinuxLabelsError tells that one volume in a pod is mounted in multiple containers and each has a different SELinux label. +type MultipleSELinuxLabelsError struct { + labels []string +} + +func (e *MultipleSELinuxLabelsError) Error() string { + return fmt.Sprintf("multiple SELinux labels found: %s", strings.Join(e.labels, ",")) +} + +func (e *MultipleSELinuxLabelsError) Labels() []string { + return e.labels +} + +func IsMultipleSELinuxLabelsError(err error) bool { + var multiError *MultipleSELinuxLabelsError + return errors.As(err, &multiError) +} + // AddSELinuxMountOption adds -o context="XYZ" mount option to a given list func AddSELinuxMountOption(options []string, seLinuxContext string) []string { if !utilfeature.DefaultFeatureGate.Enabled(features.SELinuxMountReadWriteOncePod) { @@ -200,3 +234,76 @@ func AddSELinuxMountOption(options []string, seLinuxContext string) []string { // For example: dirsync,context="system_u:object_r:container_file_t:s0:c15,c25",noatime return append(options, fmt.Sprintf("context=%q", seLinuxContext)) } + +// SELinuxLabelInfo contains information about SELinux labels that should be used to mount a volume for a Pod. +type SELinuxLabelInfo struct { + // SELinuxMountLabel is the SELinux label that should be used to mount the volume. + // The volume plugin supports SELinuxMount and the Pod did not opt out via SELinuxChangePolicy. + // Empty string otherwise. + SELinuxMountLabel string + // SELinuxProcessLabel is the SELinux label that will the container runtime use for the Pod. + // Regardless if the volume plugin supports SELinuxMount or the Pod opted out via SELinuxChangePolicy. + SELinuxProcessLabel string + // PluginSupportsSELinuxContextMount is true if the volume plugin supports SELinux mount. + PluginSupportsSELinuxContextMount bool +} + +// GetMountSELinuxLabel returns SELinux labels that should be used to mount the given volume volumeSpec and podSecurityContext. +// It does not evaluate the volume access mode! It's up to the caller to check SELinuxMount feature gate, +// it may need to bump different metrics based on feature gates / access modes / label anyway. +func GetMountSELinuxLabel(volumeSpec *volume.Spec, seLinuxContainerContexts []*v1.SELinuxOptions, podSecurityContext *v1.PodSecurityContext, volumePluginMgr *volume.VolumePluginMgr, seLinuxTranslator SELinuxLabelTranslator) (SELinuxLabelInfo, error) { + info := SELinuxLabelInfo{} + if !utilfeature.DefaultFeatureGate.Enabled(features.SELinuxMountReadWriteOncePod) { + return info, nil + } + + if !seLinuxTranslator.SELinuxEnabled() { + return info, nil + } + + pluginSupportsSELinuxContextMount, err := SupportsSELinuxContextMount(volumeSpec, volumePluginMgr) + if err != nil { + return info, err + } + + info.PluginSupportsSELinuxContextMount = pluginSupportsSELinuxContextMount + + // Collect all SELinux options from all containers that use this volume. + labels := sets.New[string]() + for _, containerContext := range seLinuxContainerContexts { + lbl, err := seLinuxTranslator.SELinuxOptionsToFileLabel(containerContext) + if err != nil { + fullErr := fmt.Errorf("failed to construct SELinux label from context %q: %w", containerContext, err) + return info, fullErr + } + labels.Insert(lbl) + } + + // Ensure that all containers use the same SELinux label. + if labels.Len() > 1 { + // This volume is used with more than one SELinux label in the pod. + return info, &MultipleSELinuxLabelsError{labels: labels.UnsortedList()} + } + if labels.Len() == 0 { + return info, nil + } + + lbl, _ := labels.PopAny() + info.SELinuxProcessLabel = lbl + info.SELinuxMountLabel = lbl + + if utilfeature.DefaultFeatureGate.Enabled(features.SELinuxChangePolicy) && + podSecurityContext != nil && + podSecurityContext.SELinuxChangePolicy != nil && + *podSecurityContext.SELinuxChangePolicy == v1.SELinuxChangePolicyRecursive { + // The pod has opted into recursive SELinux label changes. Do not mount with -o context. + info.SELinuxMountLabel = "" + } + + if !pluginSupportsSELinuxContextMount { + // The volume plugin does not support SELinux mount. Do not mount with -o context. + info.SELinuxMountLabel = "" + } + + return info, nil +} diff --git a/pkg/volume/util/selinux_test.go b/pkg/volume/util/selinux_test.go new file mode 100644 index 00000000000..1e9a389805c --- /dev/null +++ b/pkg/volume/util/selinux_test.go @@ -0,0 +1,333 @@ +/* +Copyright 2024 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package util + +import ( + "testing" + + v1 "k8s.io/api/core/v1" + utilfeature "k8s.io/apiserver/pkg/util/feature" + "k8s.io/component-base/featuregate" + featuregatetesting "k8s.io/component-base/featuregate/testing" + "k8s.io/kubernetes/pkg/features" + "k8s.io/kubernetes/pkg/volume" + volumetesting "k8s.io/kubernetes/pkg/volume/testing" + "k8s.io/utils/ptr" +) + +func TestGetMountSELinuxLabel(t *testing.T) { + pvRWOP := &v1.PersistentVolume{ + Spec: v1.PersistentVolumeSpec{ + AccessModes: []v1.PersistentVolumeAccessMode{v1.ReadWriteOncePod}, + PersistentVolumeSource: v1.PersistentVolumeSource{ + HostPath: &v1.HostPathVolumeSource{}, + }, + }, + } + pvRWX := &v1.PersistentVolume{ + Spec: v1.PersistentVolumeSpec{ + AccessModes: []v1.PersistentVolumeAccessMode{v1.ReadWriteMany}, + PersistentVolumeSource: v1.PersistentVolumeSource{ + HostPath: &v1.HostPathVolumeSource{}, + }, + }, + } + + seLinuxOpts1 := v1.SELinuxOptions{ + Level: "s0:c123,c456", + } + seLinuxOpts2 := v1.SELinuxOptions{ + Level: "s0:c234,c567", + } + seLinuxOpts3 := v1.SELinuxOptions{ + Level: "s0:c345,c678", + } + label1 := "system_u:object_r:container_file_t:s0:c123,c456" + + tests := []struct { + name string + featureGates []featuregate.Feature // SELinuxMountReadWriteOncePod is always enabled + pluginSupportsSELinux bool + volume *volume.Spec + podSecurityContext *v1.PodSecurityContext + seLinuxOptions []*v1.SELinuxOptions + expectError bool + expectedInfo SELinuxLabelInfo + }{ + // Tests with no labels + { + name: "no label, no changePolicy", + featureGates: nil, + pluginSupportsSELinux: true, + volume: &volume.Spec{PersistentVolume: pvRWOP}, + podSecurityContext: nil, + seLinuxOptions: nil, + expectError: false, + expectedInfo: SELinuxLabelInfo{ + SELinuxMountLabel: "", // no SELinuxOptions + the default policy is recursive + SELinuxProcessLabel: "", // no SELinuxOptions + PluginSupportsSELinuxContextMount: true, + }, + }, + { + name: "no label, Recursive change policy", + featureGates: nil, + pluginSupportsSELinux: true, + volume: &volume.Spec{PersistentVolume: pvRWOP}, + podSecurityContext: &v1.PodSecurityContext{SELinuxChangePolicy: ptr.To(v1.SELinuxChangePolicyRecursive)}, + seLinuxOptions: nil, + expectError: false, + expectedInfo: SELinuxLabelInfo{ + SELinuxMountLabel: "", // no SELinuxOptions + recursive policy + SELinuxProcessLabel: "", // SELinuxOptions + PluginSupportsSELinuxContextMount: true, + }, + }, + // Tests with one label and RWOP volume + { + name: "one label, Recursive change policy, no feature gate", + featureGates: nil, + pluginSupportsSELinux: true, + volume: &volume.Spec{PersistentVolume: pvRWOP}, + podSecurityContext: &v1.PodSecurityContext{SELinuxChangePolicy: ptr.To(v1.SELinuxChangePolicyRecursive)}, + seLinuxOptions: []*v1.SELinuxOptions{&seLinuxOpts1}, + expectError: false, + expectedInfo: SELinuxLabelInfo{ + SELinuxMountLabel: label1, // Recursive policy is not observed when SELinuxChangePolicy is off + SELinuxProcessLabel: label1, // Pod has a label assigned + PluginSupportsSELinuxContextMount: true, + }, + }, + { + name: "one label, Recursive change policy, SELinuxChangePolicy", + featureGates: []featuregate.Feature{features.SELinuxChangePolicy}, + pluginSupportsSELinux: true, + volume: &volume.Spec{PersistentVolume: pvRWOP}, + podSecurityContext: &v1.PodSecurityContext{SELinuxChangePolicy: ptr.To(v1.SELinuxChangePolicyRecursive)}, + seLinuxOptions: []*v1.SELinuxOptions{&seLinuxOpts1}, + expectError: false, + expectedInfo: SELinuxLabelInfo{ + SELinuxMountLabel: "", // Recursive policy is effective with SELinuxChangePolicy, affects RWOP too. + SELinuxProcessLabel: label1, // Pod has a label assigned + PluginSupportsSELinuxContextMount: true, + }, + }, + { + name: "one label, no policy", + featureGates: nil, + pluginSupportsSELinux: true, + volume: &volume.Spec{PersistentVolume: pvRWOP}, + podSecurityContext: nil, + seLinuxOptions: []*v1.SELinuxOptions{&seLinuxOpts1}, + expectError: false, + expectedInfo: SELinuxLabelInfo{ + SELinuxMountLabel: label1, // The default policy is MountOption + SELinuxProcessLabel: label1, // Pod has a label assigned + PluginSupportsSELinuxContextMount: true, + }, + }, + { + name: "one label, MountOption policy", + featureGates: nil, + pluginSupportsSELinux: true, + volume: &volume.Spec{PersistentVolume: pvRWOP}, + podSecurityContext: &v1.PodSecurityContext{SELinuxChangePolicy: ptr.To(v1.SELinuxChangePolicyMountOption)}, + seLinuxOptions: []*v1.SELinuxOptions{&seLinuxOpts1}, + expectError: false, + expectedInfo: SELinuxLabelInfo{ + SELinuxMountLabel: label1, // SELinuxChangePolicy feature is disabled, but the default policy is MountOption anyway + SELinuxProcessLabel: label1, // Pod has a label assigned + PluginSupportsSELinuxContextMount: true, + }, + }, + // Tests with RWX volume + { + name: "one label, no policy, RWX", + featureGates: nil, + pluginSupportsSELinux: true, + volume: &volume.Spec{PersistentVolume: pvRWX}, + podSecurityContext: nil, + seLinuxOptions: []*v1.SELinuxOptions{&seLinuxOpts1}, + expectError: false, + expectedInfo: SELinuxLabelInfo{ + SELinuxMountLabel: label1, // GetMountSELinuxLabel() does not check the access mode, it's up to the caller + SELinuxProcessLabel: label1, // Pod has a label assigned + PluginSupportsSELinuxContextMount: true, + }, + }, + { + name: "one label, no policy, RWX, SELinuxChangePolicy", + featureGates: []featuregate.Feature{features.SELinuxChangePolicy}, + pluginSupportsSELinux: true, + volume: &volume.Spec{PersistentVolume: pvRWX}, + podSecurityContext: nil, + seLinuxOptions: []*v1.SELinuxOptions{&seLinuxOpts1}, + expectError: false, + expectedInfo: SELinuxLabelInfo{ + SELinuxMountLabel: label1, // GetMountSELinuxLabel() does not check the access mode, it's up to the caller + SELinuxProcessLabel: label1, // Pod has a label assigned + PluginSupportsSELinuxContextMount: true, + }, + }, + { + name: "one label, MountOption policy, RWX, SELinuxChangePolicy", + featureGates: []featuregate.Feature{features.SELinuxChangePolicy}, + pluginSupportsSELinux: true, + volume: &volume.Spec{PersistentVolume: pvRWX}, + podSecurityContext: &v1.PodSecurityContext{SELinuxChangePolicy: ptr.To(v1.SELinuxChangePolicyMountOption)}, + seLinuxOptions: []*v1.SELinuxOptions{&seLinuxOpts1}, + expectError: false, + expectedInfo: SELinuxLabelInfo{ + SELinuxMountLabel: label1, // GetMountSELinuxLabel() does not check the access mode, it's up to the caller + SELinuxProcessLabel: label1, // Pod has a label assigned + PluginSupportsSELinuxContextMount: true, + }, + }, + { + name: "one label, no policy, RWX, SELinuxMount", + featureGates: []featuregate.Feature{features.SELinuxChangePolicy, features.SELinuxMount}, + pluginSupportsSELinux: true, + volume: &volume.Spec{PersistentVolume: pvRWX}, + podSecurityContext: nil, + seLinuxOptions: []*v1.SELinuxOptions{&seLinuxOpts1}, + expectError: false, + expectedInfo: SELinuxLabelInfo{ + SELinuxMountLabel: label1, // SELinuxMount FG + MountOption policy + SELinuxProcessLabel: label1, // Pod has a label assigned + PluginSupportsSELinuxContextMount: true, + }, + }, + // No plugin support + { + name: "one label, Recursive change policy, SELinuxChangePolicy, no plugin support", + featureGates: []featuregate.Feature{features.SELinuxChangePolicy}, + pluginSupportsSELinux: false, + volume: &volume.Spec{PersistentVolume: pvRWOP}, + podSecurityContext: &v1.PodSecurityContext{SELinuxChangePolicy: ptr.To(v1.SELinuxChangePolicyRecursive)}, + seLinuxOptions: []*v1.SELinuxOptions{&seLinuxOpts1}, + expectError: false, + expectedInfo: SELinuxLabelInfo{ + SELinuxMountLabel: "", // No plugin support + SELinuxProcessLabel: label1, // Pod has a label assigned + PluginSupportsSELinuxContextMount: false, + }, + }, + { + name: "one label, no policy, no plugin support", + featureGates: nil, + pluginSupportsSELinux: false, + volume: &volume.Spec{PersistentVolume: pvRWOP}, + podSecurityContext: nil, + seLinuxOptions: []*v1.SELinuxOptions{&seLinuxOpts1}, + expectError: false, + expectedInfo: SELinuxLabelInfo{ + SELinuxMountLabel: "", // No plugin support + SELinuxProcessLabel: label1, // Pod has a label assigned + PluginSupportsSELinuxContextMount: false, + }, + }, + { + name: "one label, MountOption policy, no plugin support", + featureGates: nil, + pluginSupportsSELinux: false, + volume: &volume.Spec{PersistentVolume: pvRWOP}, + podSecurityContext: &v1.PodSecurityContext{SELinuxChangePolicy: ptr.To(v1.SELinuxChangePolicyMountOption)}, + seLinuxOptions: []*v1.SELinuxOptions{&seLinuxOpts1}, + expectError: false, + expectedInfo: SELinuxLabelInfo{ + SELinuxMountLabel: "", // No plugin support + SELinuxProcessLabel: label1, // Pod has a label assigned + PluginSupportsSELinuxContextMount: false, + }, + }, + // Corner cases + { + name: "multiple same labels, no policy", + featureGates: nil, + pluginSupportsSELinux: true, + volume: &volume.Spec{PersistentVolume: pvRWOP}, + podSecurityContext: nil, + seLinuxOptions: []*v1.SELinuxOptions{&seLinuxOpts1, &seLinuxOpts1, &seLinuxOpts1, &seLinuxOpts1}, + expectError: false, + expectedInfo: SELinuxLabelInfo{ + SELinuxMountLabel: label1, // The default policy is MountOption + SELinuxProcessLabel: label1, // Pod has a label assigned + PluginSupportsSELinuxContextMount: true, + }, + }, + // Error cases + { + name: "multiple different labels, no policy", + featureGates: nil, + pluginSupportsSELinux: true, + volume: &volume.Spec{PersistentVolume: pvRWOP}, + podSecurityContext: nil, + seLinuxOptions: []*v1.SELinuxOptions{&seLinuxOpts1, &seLinuxOpts2, &seLinuxOpts3}, + expectError: true, + expectedInfo: SELinuxLabelInfo{ + SELinuxMountLabel: "", + SELinuxProcessLabel: "", + PluginSupportsSELinuxContextMount: true, + }, + }, + { + name: "multiple different labels, Recursive policy", + featureGates: nil, + pluginSupportsSELinux: true, + volume: &volume.Spec{PersistentVolume: pvRWOP}, + podSecurityContext: &v1.PodSecurityContext{SELinuxChangePolicy: ptr.To(v1.SELinuxChangePolicyRecursive)}, + seLinuxOptions: []*v1.SELinuxOptions{&seLinuxOpts1, &seLinuxOpts2, &seLinuxOpts3}, + expectError: true, + expectedInfo: SELinuxLabelInfo{ + SELinuxMountLabel: "", + SELinuxProcessLabel: "", + PluginSupportsSELinuxContextMount: true, + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // Arrange + for _, fg := range tt.featureGates { + featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, fg, true) + } + seLinuxTranslator := NewFakeSELinuxLabelTranslator() + pluginMgr, plugin := volumetesting.GetTestKubeletVolumePluginMgr(t) + plugin.SupportsSELinux = tt.pluginSupportsSELinux + + // Act + info, err := GetMountSELinuxLabel(tt.volume, tt.seLinuxOptions, tt.podSecurityContext, pluginMgr, seLinuxTranslator) + + // Assert + if err != nil { + if !tt.expectError { + t.Errorf("GetMountSELinuxLabel() unexpected error: %v", err) + } + return + } + if tt.expectError { + t.Errorf("GetMountSELinuxLabel() expected error, got none") + return + } + + if info != tt.expectedInfo { + t.Errorf("GetMountSELinuxLabel() expected %+v, got %+v", tt.expectedInfo, info) + } + }) + } +}