mirror of
https://github.com/optim-enterprises-bv/kubernetes.git
synced 2025-11-03 19:58:17 +00:00
Update moby/runc dependencies
Signed-off-by: Davanum Srinivas <davanum@gmail.com>
This commit is contained in:
68
vendor/github.com/opencontainers/runc/libcontainer/rootfs_linux.go
generated
vendored
68
vendor/github.com/opencontainers/runc/libcontainer/rootfs_linux.go
generated
vendored
@@ -801,54 +801,33 @@ func mknodDevice(dest string, node *devices.Device) error {
|
||||
return os.Chown(dest, int(node.Uid), int(node.Gid))
|
||||
}
|
||||
|
||||
// Get the parent mount point of directory passed in as argument. Also return
|
||||
// optional fields.
|
||||
func getParentMount(rootfs string) (string, string, error) {
|
||||
mi, err := mountinfo.GetMounts(mountinfo.ParentsFilter(rootfs))
|
||||
if err != nil {
|
||||
return "", "", err
|
||||
}
|
||||
if len(mi) < 1 {
|
||||
return "", "", fmt.Errorf("could not find parent mount of %s", rootfs)
|
||||
}
|
||||
|
||||
// find the longest mount point
|
||||
var idx, maxlen int
|
||||
for i := range mi {
|
||||
if len(mi[i].Mountpoint) > maxlen {
|
||||
maxlen = len(mi[i].Mountpoint)
|
||||
idx = i
|
||||
// rootfsParentMountPrivate ensures rootfs parent mount is private.
|
||||
// This is needed for two reasons:
|
||||
// - pivot_root() will fail if parent mount is shared;
|
||||
// - when we bind mount rootfs, if its parent is not private, the new mount
|
||||
// will propagate (leak!) to parent namespace and we don't want that.
|
||||
func rootfsParentMountPrivate(path string) error {
|
||||
var err error
|
||||
// Assuming path is absolute and clean (this is checked in
|
||||
// libcontainer/validate). Any error other than EINVAL means we failed,
|
||||
// and EINVAL means this is not a mount point, so traverse up until we
|
||||
// find one.
|
||||
for {
|
||||
err = unix.Mount("", path, "", unix.MS_PRIVATE, "")
|
||||
if err == nil {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
return mi[idx].Mountpoint, mi[idx].Optional, nil
|
||||
}
|
||||
|
||||
// Make parent mount private if it was shared
|
||||
func rootfsParentMountPrivate(rootfs string) error {
|
||||
sharedMount := false
|
||||
|
||||
parentMount, optionalOpts, err := getParentMount(rootfs)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
optsSplit := strings.Split(optionalOpts, " ")
|
||||
for _, opt := range optsSplit {
|
||||
if strings.HasPrefix(opt, "shared:") {
|
||||
sharedMount = true
|
||||
if err != unix.EINVAL || path == "/" { //nolint:errorlint // unix errors are bare
|
||||
break
|
||||
}
|
||||
path = filepath.Dir(path)
|
||||
}
|
||||
|
||||
// Make parent mount PRIVATE if it was shared. It is needed for two
|
||||
// reasons. First of all pivot_root() will fail if parent mount is
|
||||
// shared. Secondly when we bind mount rootfs it will propagate to
|
||||
// parent namespace and we don't want that to happen.
|
||||
if sharedMount {
|
||||
return mount("", parentMount, "", "", unix.MS_PRIVATE, "")
|
||||
return &mountError{
|
||||
op: "remount-private",
|
||||
target: path,
|
||||
flags: unix.MS_PRIVATE,
|
||||
err: err,
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func prepareRoot(config *configs.Config) error {
|
||||
@@ -860,9 +839,6 @@ func prepareRoot(config *configs.Config) error {
|
||||
return err
|
||||
}
|
||||
|
||||
// Make parent mount private to make sure following bind mount does
|
||||
// not propagate in other namespaces. Also it will help with kernel
|
||||
// check pass in pivot_root. (IS_SHARED(new_mnt->mnt_parent))
|
||||
if err := rootfsParentMountPrivate(config.Rootfs); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
113
vendor/github.com/opencontainers/runc/libcontainer/seccomp/patchbpf/enosys_linux.go
generated
vendored
113
vendor/github.com/opencontainers/runc/libcontainer/seccomp/patchbpf/enosys_linux.go
generated
vendored
@@ -164,11 +164,11 @@ func disassembleFilter(filter *libseccomp.ScmpFilter) ([]bpf.Instruction, error)
|
||||
return program, nil
|
||||
}
|
||||
|
||||
type nativeArch uint32
|
||||
type linuxAuditArch uint32
|
||||
|
||||
const invalidArch nativeArch = 0
|
||||
const invalidArch linuxAuditArch = 0
|
||||
|
||||
func archToNative(arch libseccomp.ScmpArch) (nativeArch, error) {
|
||||
func scmpArchToAuditArch(arch libseccomp.ScmpArch) (linuxAuditArch, error) {
|
||||
switch arch {
|
||||
case libseccomp.ArchNative:
|
||||
// Convert to actual native architecture.
|
||||
@@ -176,85 +176,89 @@ func archToNative(arch libseccomp.ScmpArch) (nativeArch, error) {
|
||||
if err != nil {
|
||||
return invalidArch, fmt.Errorf("unable to get native arch: %w", err)
|
||||
}
|
||||
return archToNative(arch)
|
||||
return scmpArchToAuditArch(arch)
|
||||
case libseccomp.ArchX86:
|
||||
return nativeArch(C.C_AUDIT_ARCH_I386), nil
|
||||
return linuxAuditArch(C.C_AUDIT_ARCH_I386), nil
|
||||
case libseccomp.ArchAMD64, libseccomp.ArchX32:
|
||||
// NOTE: x32 is treated like x86_64 except all x32 syscalls have the
|
||||
// 30th bit of the syscall number set to indicate that it's not a
|
||||
// normal x86_64 syscall.
|
||||
return nativeArch(C.C_AUDIT_ARCH_X86_64), nil
|
||||
return linuxAuditArch(C.C_AUDIT_ARCH_X86_64), nil
|
||||
case libseccomp.ArchARM:
|
||||
return nativeArch(C.C_AUDIT_ARCH_ARM), nil
|
||||
return linuxAuditArch(C.C_AUDIT_ARCH_ARM), nil
|
||||
case libseccomp.ArchARM64:
|
||||
return nativeArch(C.C_AUDIT_ARCH_AARCH64), nil
|
||||
return linuxAuditArch(C.C_AUDIT_ARCH_AARCH64), nil
|
||||
case libseccomp.ArchMIPS:
|
||||
return nativeArch(C.C_AUDIT_ARCH_MIPS), nil
|
||||
return linuxAuditArch(C.C_AUDIT_ARCH_MIPS), nil
|
||||
case libseccomp.ArchMIPS64:
|
||||
return nativeArch(C.C_AUDIT_ARCH_MIPS64), nil
|
||||
return linuxAuditArch(C.C_AUDIT_ARCH_MIPS64), nil
|
||||
case libseccomp.ArchMIPS64N32:
|
||||
return nativeArch(C.C_AUDIT_ARCH_MIPS64N32), nil
|
||||
return linuxAuditArch(C.C_AUDIT_ARCH_MIPS64N32), nil
|
||||
case libseccomp.ArchMIPSEL:
|
||||
return nativeArch(C.C_AUDIT_ARCH_MIPSEL), nil
|
||||
return linuxAuditArch(C.C_AUDIT_ARCH_MIPSEL), nil
|
||||
case libseccomp.ArchMIPSEL64:
|
||||
return nativeArch(C.C_AUDIT_ARCH_MIPSEL64), nil
|
||||
return linuxAuditArch(C.C_AUDIT_ARCH_MIPSEL64), nil
|
||||
case libseccomp.ArchMIPSEL64N32:
|
||||
return nativeArch(C.C_AUDIT_ARCH_MIPSEL64N32), nil
|
||||
return linuxAuditArch(C.C_AUDIT_ARCH_MIPSEL64N32), nil
|
||||
case libseccomp.ArchPPC:
|
||||
return nativeArch(C.C_AUDIT_ARCH_PPC), nil
|
||||
return linuxAuditArch(C.C_AUDIT_ARCH_PPC), nil
|
||||
case libseccomp.ArchPPC64:
|
||||
return nativeArch(C.C_AUDIT_ARCH_PPC64), nil
|
||||
return linuxAuditArch(C.C_AUDIT_ARCH_PPC64), nil
|
||||
case libseccomp.ArchPPC64LE:
|
||||
return nativeArch(C.C_AUDIT_ARCH_PPC64LE), nil
|
||||
return linuxAuditArch(C.C_AUDIT_ARCH_PPC64LE), nil
|
||||
case libseccomp.ArchS390:
|
||||
return nativeArch(C.C_AUDIT_ARCH_S390), nil
|
||||
return linuxAuditArch(C.C_AUDIT_ARCH_S390), nil
|
||||
case libseccomp.ArchS390X:
|
||||
return nativeArch(C.C_AUDIT_ARCH_S390X), nil
|
||||
return linuxAuditArch(C.C_AUDIT_ARCH_S390X), nil
|
||||
case libseccomp.ArchRISCV64:
|
||||
return nativeArch(C.C_AUDIT_ARCH_RISCV64), nil
|
||||
return linuxAuditArch(C.C_AUDIT_ARCH_RISCV64), nil
|
||||
default:
|
||||
return invalidArch, fmt.Errorf("unknown architecture: %v", arch)
|
||||
}
|
||||
}
|
||||
|
||||
type lastSyscallMap map[nativeArch]map[libseccomp.ScmpArch]libseccomp.ScmpSyscall
|
||||
type lastSyscallMap map[linuxAuditArch]map[libseccomp.ScmpArch]libseccomp.ScmpSyscall
|
||||
|
||||
// Figure out largest syscall number referenced in the filter for each
|
||||
// architecture. We will be generating code based on the native architecture
|
||||
// representation, but SCMP_ARCH_X32 means we have to track cases where the
|
||||
// same architecture has different largest syscalls based on the mode.
|
||||
func findLastSyscalls(config *configs.Seccomp) (lastSyscallMap, error) {
|
||||
lastSyscalls := make(lastSyscallMap)
|
||||
// Only loop over architectures which are present in the filter. Any other
|
||||
// architectures will get the libseccomp bad architecture action anyway.
|
||||
scmpArchs := make(map[libseccomp.ScmpArch]struct{})
|
||||
for _, ociArch := range config.Architectures {
|
||||
arch, err := libseccomp.GetArchFromString(ociArch)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unable to validate seccomp architecture: %w", err)
|
||||
}
|
||||
scmpArchs[arch] = struct{}{}
|
||||
}
|
||||
// On architectures like ppc64le, Docker inexplicably doesn't include the
|
||||
// native architecture in the architecture list which results in no
|
||||
// architectures being present in the list at all (rendering the ENOSYS
|
||||
// stub a no-op). So, always include the native architecture.
|
||||
if nativeScmpArch, err := libseccomp.GetNativeArch(); err != nil {
|
||||
return nil, fmt.Errorf("unable to get native arch: %w", err)
|
||||
} else if _, ok := scmpArchs[nativeScmpArch]; !ok {
|
||||
logrus.Debugf("seccomp: adding implied native architecture %v to config set", nativeScmpArch)
|
||||
scmpArchs[nativeScmpArch] = struct{}{}
|
||||
}
|
||||
logrus.Debugf("seccomp: configured architecture set: %s", scmpArchs)
|
||||
|
||||
// Map native architecture to a real architecture value to avoid
|
||||
// doubling-up the lastSyscall mapping.
|
||||
if arch == libseccomp.ArchNative {
|
||||
nativeArch, err := libseccomp.GetNativeArch()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unable to get native architecture: %w", err)
|
||||
}
|
||||
arch = nativeArch
|
||||
}
|
||||
|
||||
// Figure out native architecture representation of the architecture.
|
||||
nativeArch, err := archToNative(arch)
|
||||
// Only loop over architectures which are present in the filter. Any other
|
||||
// architectures will get the libseccomp bad architecture action anyway.
|
||||
lastSyscalls := make(lastSyscallMap)
|
||||
for arch := range scmpArchs {
|
||||
auditArch, err := scmpArchToAuditArch(arch)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot map architecture %v to AUDIT_ARCH_ constant: %w", arch, err)
|
||||
}
|
||||
|
||||
if _, ok := lastSyscalls[nativeArch]; !ok {
|
||||
lastSyscalls[nativeArch] = map[libseccomp.ScmpArch]libseccomp.ScmpSyscall{}
|
||||
if _, ok := lastSyscalls[auditArch]; !ok {
|
||||
lastSyscalls[auditArch] = map[libseccomp.ScmpArch]libseccomp.ScmpSyscall{}
|
||||
}
|
||||
if _, ok := lastSyscalls[nativeArch][arch]; ok {
|
||||
if _, ok := lastSyscalls[auditArch][arch]; ok {
|
||||
// Because of ArchNative we may hit the same entry multiple times.
|
||||
// Just skip it if we've seen this (nativeArch, ScmpArch)
|
||||
// Just skip it if we've seen this (linuxAuditArch, ScmpArch)
|
||||
// combination before.
|
||||
continue
|
||||
}
|
||||
@@ -272,10 +276,11 @@ func findLastSyscalls(config *configs.Seccomp) (lastSyscallMap, error) {
|
||||
}
|
||||
}
|
||||
if largestSyscall != 0 {
|
||||
lastSyscalls[nativeArch][arch] = largestSyscall
|
||||
logrus.Debugf("seccomp: largest syscall number for arch %v is %v", arch, largestSyscall)
|
||||
lastSyscalls[auditArch][arch] = largestSyscall
|
||||
} else {
|
||||
logrus.Warnf("could not find any syscalls for arch %s", ociArch)
|
||||
delete(lastSyscalls[nativeArch], arch)
|
||||
logrus.Warnf("could not find any syscalls for arch %v", arch)
|
||||
delete(lastSyscalls[auditArch], arch)
|
||||
}
|
||||
}
|
||||
return lastSyscalls, nil
|
||||
@@ -293,10 +298,10 @@ func findLastSyscalls(config *configs.Seccomp) (lastSyscallMap, error) {
|
||||
// close_range(2) which were added out-of-order in the syscall table between
|
||||
// kernel releases.
|
||||
func generateEnosysStub(lastSyscalls lastSyscallMap) ([]bpf.Instruction, error) {
|
||||
// A jump-table for each nativeArch used to generate the initial
|
||||
// A jump-table for each linuxAuditArch used to generate the initial
|
||||
// conditional jumps -- measured from the *END* of the program so they
|
||||
// remain valid after prepending to the tail.
|
||||
archJumpTable := map[nativeArch]uint32{}
|
||||
archJumpTable := map[linuxAuditArch]uint32{}
|
||||
|
||||
// Generate our own -ENOSYS rules for each architecture. They have to be
|
||||
// generated in reverse (prepended to the tail of the program) because the
|
||||
@@ -309,7 +314,7 @@ func generateEnosysStub(lastSyscalls lastSyscallMap) ([]bpf.Instruction, error)
|
||||
}
|
||||
|
||||
// Generate the syscall -ENOSYS rules.
|
||||
for nativeArch, maxSyscalls := range lastSyscalls {
|
||||
for auditArch, maxSyscalls := range lastSyscalls {
|
||||
// The number of instructions from the tail of this section which need
|
||||
// to be jumped in order to reach the -ENOSYS return. If the section
|
||||
// does not jump, it will fall through to the actual filter.
|
||||
@@ -390,7 +395,7 @@ func generateEnosysStub(lastSyscalls lastSyscallMap) ([]bpf.Instruction, error)
|
||||
|
||||
// If we're on x86 we need to add a check for x32 and if we're in
|
||||
// the wrong mode we jump over the section.
|
||||
if uint32(nativeArch) == uint32(C.C_AUDIT_ARCH_X86_64) {
|
||||
if uint32(auditArch) == uint32(C.C_AUDIT_ARCH_X86_64) {
|
||||
// Generate a prefix to check the mode.
|
||||
switch scmpArch {
|
||||
case libseccomp.ArchAMD64:
|
||||
@@ -419,8 +424,8 @@ func generateEnosysStub(lastSyscalls lastSyscallMap) ([]bpf.Instruction, error)
|
||||
section = append(section, sectionTail...)
|
||||
case 2:
|
||||
// x32 and x86_64 are a unique case, we can't handle any others.
|
||||
if uint32(nativeArch) != uint32(C.C_AUDIT_ARCH_X86_64) {
|
||||
return nil, fmt.Errorf("unknown architecture overlap on native arch %#x", nativeArch)
|
||||
if uint32(auditArch) != uint32(C.C_AUDIT_ARCH_X86_64) {
|
||||
return nil, fmt.Errorf("unknown architecture overlap on native arch %#x", auditArch)
|
||||
}
|
||||
|
||||
x32sysno, ok := maxSyscalls[libseccomp.ArchX32]
|
||||
@@ -497,7 +502,7 @@ func generateEnosysStub(lastSyscalls lastSyscallMap) ([]bpf.Instruction, error)
|
||||
programTail = append(section, programTail...)
|
||||
|
||||
// Update jump table.
|
||||
archJumpTable[nativeArch] = uint32(len(programTail))
|
||||
archJumpTable[auditArch] = uint32(len(programTail))
|
||||
}
|
||||
|
||||
// Add a dummy "jump to filter" for any architecture we might miss below.
|
||||
@@ -517,9 +522,9 @@ func generateEnosysStub(lastSyscalls lastSyscallMap) ([]bpf.Instruction, error)
|
||||
// architectures based on how large the jumps are going to be, or
|
||||
// re-sort the candidate architectures each time to make sure that we
|
||||
// pick the largest jump which is going to be smaller than 255.
|
||||
for nativeArch := range lastSyscalls {
|
||||
for auditArch := range lastSyscalls {
|
||||
// We jump forwards but the jump table is calculated from the *END*.
|
||||
jump := uint32(len(programTail)) - archJumpTable[nativeArch]
|
||||
jump := uint32(len(programTail)) - archJumpTable[auditArch]
|
||||
|
||||
// Same routine as above -- this is a basic jeq check, complicated
|
||||
// slightly if it turns out that we need to do a long jump.
|
||||
@@ -528,7 +533,7 @@ func generateEnosysStub(lastSyscalls lastSyscallMap) ([]bpf.Instruction, error)
|
||||
// jeq [arch],[jump]
|
||||
bpf.JumpIf{
|
||||
Cond: bpf.JumpEqual,
|
||||
Val: uint32(nativeArch),
|
||||
Val: uint32(auditArch),
|
||||
SkipTrue: uint8(jump),
|
||||
},
|
||||
}, programTail...)
|
||||
@@ -537,7 +542,7 @@ func generateEnosysStub(lastSyscalls lastSyscallMap) ([]bpf.Instruction, error)
|
||||
// jne [arch],1
|
||||
bpf.JumpIf{
|
||||
Cond: bpf.JumpNotEqual,
|
||||
Val: uint32(nativeArch),
|
||||
Val: uint32(auditArch),
|
||||
SkipTrue: 1,
|
||||
},
|
||||
// ja [jump]
|
||||
|
||||
Reference in New Issue
Block a user