From e60c023e3c2b5ee7880be2cd411c8bdbf265078c Mon Sep 17 00:00:00 2001 From: Jan Safranek Date: Thu, 10 Aug 2017 12:11:54 +0200 Subject: [PATCH] UPSTREAM: 49640: Run mount in its own systemd scope --- .../k8s.io/kubernetes/pkg/util/mount/mount.go | 9 -- .../kubernetes/pkg/util/mount/mount_linux.go | 88 +++++++++++++++++-- .../pkg/util/mount/mount_unsupported.go | 9 ++ .../pkg/util/mount/nsenter_mount.go | 54 ++++++++++-- 4 files changed, 135 insertions(+), 25 deletions(-) diff --git a/vendor/k8s.io/kubernetes/pkg/util/mount/mount.go b/vendor/k8s.io/kubernetes/pkg/util/mount/mount.go index 44058042d508..8315f7ea3779 100644 --- a/vendor/k8s.io/kubernetes/pkg/util/mount/mount.go +++ b/vendor/k8s.io/kubernetes/pkg/util/mount/mount.go @@ -94,15 +94,6 @@ func (mounter *SafeFormatAndMount) FormatAndMount(source string, target string, return mounter.formatAndMount(source, target, fstype, options) } -// New returns a mount.Interface for the current system. -// It provides options to override the default mounter behavior. -// mounterPath allows using an alternative to `/bin/mount` for mounting. -func New(mounterPath string) Interface { - return &Mounter{ - mounterPath: mounterPath, - } -} - // GetMountRefs finds all other references to the device referenced // by mountPath; returns a list of paths. func GetMountRefs(mounter Interface, mountPath string) ([]string, error) { diff --git a/vendor/k8s.io/kubernetes/pkg/util/mount/mount_linux.go b/vendor/k8s.io/kubernetes/pkg/util/mount/mount_linux.go index 1685ecb48f08..e9167facfc53 100644 --- a/vendor/k8s.io/kubernetes/pkg/util/mount/mount_linux.go +++ b/vendor/k8s.io/kubernetes/pkg/util/mount/mount_linux.go @@ -55,6 +55,17 @@ const ( // kubelet is running in the host's root mount namespace. type Mounter struct { mounterPath string + withSystemd bool +} + +// New returns a mount.Interface for the current system. +// It provides options to override the default mounter behavior. +// mounterPath allows using an alternative to `/bin/mount` for mounting. +func New(mounterPath string) Interface { + return &Mounter{ + mounterPath: mounterPath, + withSystemd: detectSystemd(), + } } // Mount mounts source to target as fstype with given options. 'source' and 'fstype' must @@ -68,18 +79,18 @@ func (mounter *Mounter) Mount(source string, target string, fstype string, optio mounterPath := "" bind, bindRemountOpts := isBind(options) if bind { - err := doMount(mounterPath, defaultMountCommand, source, target, fstype, []string{"bind"}) + err := mounter.doMount(mounterPath, defaultMountCommand, source, target, fstype, []string{"bind"}) if err != nil { return err } - return doMount(mounterPath, defaultMountCommand, source, target, fstype, bindRemountOpts) + return mounter.doMount(mounterPath, defaultMountCommand, source, target, fstype, bindRemountOpts) } // The list of filesystems that require containerized mounter on GCI image cluster fsTypesNeedMounter := sets.NewString("nfs", "glusterfs", "ceph", "cifs") if fsTypesNeedMounter.Has(fstype) { mounterPath = mounter.mounterPath } - return doMount(mounterPath, defaultMountCommand, source, target, fstype, options) + return mounter.doMount(mounterPath, defaultMountCommand, source, target, fstype, options) } // isBind detects whether a bind mount is being requested and makes the remount options to @@ -108,24 +119,80 @@ func isBind(options []string) (bool, []string) { } // doMount runs the mount command. mounterPath is the path to mounter binary if containerized mounter is used. -func doMount(mounterPath string, mountCmd string, source string, target string, fstype string, options []string) error { +func (m *Mounter) doMount(mounterPath string, mountCmd string, source string, target string, fstype string, options []string) error { mountArgs := makeMountArgs(source, target, fstype, options) if len(mounterPath) > 0 { mountArgs = append([]string{mountCmd}, mountArgs...) mountCmd = mounterPath } + if m.withSystemd { + // Try to run mount via systemd-run --scope. This will escape the + // service where kubelet runs and any fuse daemons will be started in a + // specific scope. kubelet service than can be restarted without killing + // these fuse daemons. + // + // Complete command line (when mounterPath is not used): + // systemd-run --description=... --scope -- mount -t + // + // Expected flow: + // * systemd-run creates a transient scope (=~ cgroup) and executes its + // argument (/bin/mount) there. + // * mount does its job, forks a fuse daemon if necessary and finishes. + // (systemd-run --scope finishes at this point, returning mount's exit + // code and stdout/stderr - thats one of --scope benefits). + // * systemd keeps the fuse daemon running in the scope (i.e. in its own + // cgroup) until the fuse daemon dies (another --scope benefit). + // Kubelet service can be restarted and the fuse daemon survives. + // * When the fuse daemon dies (e.g. during unmount) systemd removes the + // scope automatically. + // + // systemd-mount is not used because it's too new for older distros + // (CentOS 7, Debian Jessie). + mountCmd, mountArgs = addSystemdScope("systemd-run", target, mountCmd, mountArgs) + } else { + // No systemd-run on the host (or we failed to check it), assume kubelet + // does not run as a systemd service. + // No code here, mountCmd and mountArgs are already populated. + } + glog.V(4).Infof("Mounting cmd (%s) with arguments (%s)", mountCmd, mountArgs) command := exec.Command(mountCmd, mountArgs...) output, err := command.CombinedOutput() if err != nil { - glog.Errorf("Mount failed: %v\nMounting command: %s\nMounting arguments: %s %s %s %v\nOutput: %s\n", err, mountCmd, source, target, fstype, options, string(output)) - return fmt.Errorf("mount failed: %v\nMounting command: %s\nMounting arguments: %s %s %s %v\nOutput: %s\n", - err, mountCmd, source, target, fstype, options, string(output)) + args := strings.Join(mountArgs, " ") + glog.Errorf("Mount failed: %v\nMounting command: %s\nMounting arguments: %s\nOutput: %s\n", err, mountCmd, args, string(output)) + return fmt.Errorf("mount failed: %v\nMounting command: %s\nMounting arguments: %s\nOutput: %s\n", + err, mountCmd, args, string(output)) } return err } +// detectSystemd returns true if OS runs with systemd as init. When not sure +// (permission errors, ...), it returns false. +// There may be different ways how to detect systemd, this one makes sure that +// systemd-runs (needed by Mount()) works. +func detectSystemd() bool { + if _, err := exec.LookPath("systemd-run"); err != nil { + glog.V(2).Infof("Detected OS without systemd") + return false + } + // Try to run systemd-run --scope /bin/true, that should be enough + // to make sure that systemd is really running and not just installed, + // which happens when running in a container with a systemd-based image + // but with different pid 1. + cmd := exec.Command("systemd-run", "--description=Kubernetes systemd probe", "--scope", "true") + output, err := cmd.CombinedOutput() + if err != nil { + glog.V(2).Infof("Cannot run systemd-run, assuming non-systemd OS") + glog.V(4).Infof("systemd-run failed with: %v", err) + glog.V(4).Infof("systemd-run output: %s", string(output)) + return false + } + glog.V(2).Infof("Detected OS with systemd") + return true +} + // makeMountArgs makes the arguments to the mount(8) command. func makeMountArgs(source, target, fstype string, options []string) []string { // Build mount command as follows: @@ -145,6 +212,13 @@ func makeMountArgs(source, target, fstype string, options []string) []string { return mountArgs } +// addSystemdScope adds "system-run --scope" to given command line +func addSystemdScope(systemdRunPath, mountName, command string, args []string) (string, []string) { + descriptionArg := fmt.Sprintf("--description=Kubernetes transient mount for %s", mountName) + systemdRunArgs := []string{descriptionArg, "--scope", "--", command} + return systemdRunPath, append(systemdRunArgs, args...) +} + // Unmount unmounts the target. func (mounter *Mounter) Unmount(target string) error { glog.V(4).Infof("Unmounting %s", target) diff --git a/vendor/k8s.io/kubernetes/pkg/util/mount/mount_unsupported.go b/vendor/k8s.io/kubernetes/pkg/util/mount/mount_unsupported.go index f9abab813dc2..2119b1a0ad67 100644 --- a/vendor/k8s.io/kubernetes/pkg/util/mount/mount_unsupported.go +++ b/vendor/k8s.io/kubernetes/pkg/util/mount/mount_unsupported.go @@ -22,6 +22,15 @@ type Mounter struct { mounterPath string } +// New returns a mount.Interface for the current system. +// It provides options to override the default mounter behavior. +// mounterPath allows using an alternative to `/bin/mount` for mounting. +func New(mounterPath string) Interface { + return &Mounter{ + mounterPath: mounterPath, + } +} + func (mounter *Mounter) Mount(source string, target string, fstype string, options []string) error { return nil } diff --git a/vendor/k8s.io/kubernetes/pkg/util/mount/nsenter_mount.go b/vendor/k8s.io/kubernetes/pkg/util/mount/nsenter_mount.go index b923cbeddc47..2a274b9dc6d6 100644 --- a/vendor/k8s.io/kubernetes/pkg/util/mount/nsenter_mount.go +++ b/vendor/k8s.io/kubernetes/pkg/util/mount/nsenter_mount.go @@ -51,7 +51,7 @@ import ( // contents. TODO: remove this requirement. // 6. The host image must have mount, findmnt, and umount binaries in /bin, // /usr/sbin, or /usr/bin -// +// 7. The host image should have systemd-run in /bin, /usr/sbin, or /usr/bin // For more information about mount propagation modes, see: // https://www.kernel.org/doc/Documentation/filesystems/sharedsubtree.txt type NsenterMounter struct { @@ -62,9 +62,10 @@ type NsenterMounter struct { func NewNsenterMounter() *NsenterMounter { m := &NsenterMounter{ paths: map[string]string{ - "mount": "", - "findmnt": "", - "umount": "", + "mount": "", + "findmnt": "", + "umount": "", + "systemd-run": "", }, } // search for the mount command in other locations besides /usr/bin @@ -80,6 +81,7 @@ func NewNsenterMounter() *NsenterMounter { break } // TODO: error, so that the kubelet can stop if the mounts don't exist + // (don't forget that systemd-run is optional) } return m } @@ -128,15 +130,47 @@ func (n *NsenterMounter) doNsenterMount(source, target, fstype string, options [ // makeNsenterArgs makes a list of argument to nsenter in order to do the // requested mount. func (n *NsenterMounter) makeNsenterArgs(source, target, fstype string, options []string) []string { + mountCmd := n.absHostPath("mount") + mountArgs := makeMountArgs(source, target, fstype, options) + + if systemdRunPath, hasSystemd := n.paths["systemd-run"]; hasSystemd { + // Complete command line: + // nsenter --mount=/rootfs/proc/1/ns/mnt -- /bin/systemd-run --description=... --scope -- /bin/mount -t + // Expected flow is: + // * nsenter breaks out of container's mount namespace and executes + // host's systemd-run. + // * systemd-run creates a transient scope (=~ cgroup) and executes its + // argument (/bin/mount) there. + // * mount does its job, forks a fuse daemon if necessary and finishes. + // (systemd-run --scope finishes at this point, returning mount's exit + // code and stdout/stderr - thats one of --scope benefits). + // * systemd keeps the fuse daemon running in the scope (i.e. in its own + // cgroup) until the fuse daemon dies (another --scope benefit). + // Kubelet container can be restarted and the fuse daemon survives. + // * When the daemon dies (e.g. during unmount) systemd removes the + // scope automatically. + mountCmd, mountArgs = addSystemdScope(systemdRunPath, target, mountCmd, mountArgs) + } else { + // Fall back to simple mount when the host has no systemd. + // Complete command line: + // nsenter --mount=/rootfs/proc/1/ns/mnt -- /bin/mount -t + // Expected flow is: + // * nsenter breaks out of container's mount namespace and executes host's /bin/mount. + // * mount does its job, forks a fuse daemon if necessary and finishes. + // * Any fuse daemon runs in cgroup of kubelet docker container, + // restart of kubelet container will kill it! + + // No code here, mountCmd and mountArgs use /bin/mount + } + nsenterArgs := []string{ "--mount=/rootfs/proc/1/ns/mnt", "--", - n.absHostPath("mount"), + mountCmd, } + nsenterArgs = append(nsenterArgs, mountArgs...) - args := makeMountArgs(source, target, fstype, options) - - return append(nsenterArgs, args...) + return nsenterArgs } // Unmount runs umount(8) in the host's mount namespace. @@ -147,7 +181,9 @@ func (n *NsenterMounter) Unmount(target string) error { n.absHostPath("umount"), target, } - + // No need to execute systemd-run here, it's enough that unmount is executed + // in the host's mount namespace. It will finish appropriate fuse daemon(s) + // running in any scope. glog.V(5).Infof("Unmount command: %v %v", nsenterPath, args) exec := exec.New() outputBytes, err := exec.Command(nsenterPath, args...).CombinedOutput()