diff --git a/drivers/shared/executor/executor_linux.go b/drivers/shared/executor/executor_linux.go index f81090a56e4b..7a9c55b8ae7a 100644 --- a/drivers/shared/executor/executor_linux.go +++ b/drivers/shared/executor/executor_linux.go @@ -17,7 +17,6 @@ import ( "github.com/armon/circbuf" "github.com/hashicorp/consul-template/signals" hclog "github.com/hashicorp/go-hclog" - multierror "github.com/hashicorp/go-multierror" "github.com/hashicorp/nomad/client/allocdir" "github.com/hashicorp/nomad/client/stats" cstructs "github.com/hashicorp/nomad/client/structs" @@ -27,7 +26,6 @@ import ( "github.com/hashicorp/nomad/plugins/drivers" "github.com/opencontainers/runc/libcontainer" "github.com/opencontainers/runc/libcontainer/cgroups" - cgroupFs "github.com/opencontainers/runc/libcontainer/cgroups/fs" lconfigs "github.com/opencontainers/runc/libcontainer/configs" ldevices "github.com/opencontainers/runc/libcontainer/devices" lutils "github.com/opencontainers/runc/libcontainer/utils" @@ -36,7 +34,7 @@ import ( ) const ( - defaultCgroupParent = "nomad" + defaultCgroupParent = "/nomad" ) var ( @@ -92,15 +90,6 @@ func (l *LibcontainerExecutor) Launch(command *ExecCommand) (*ProcessState, erro l.command = command - // Move to the root cgroup until process is started - subsystems, err := cgroups.GetAllSubsystems() - if err != nil { - return nil, err - } - if err := JoinRootCgroup(subsystems); err != nil { - return nil, err - } - // create a new factory which will store the container state in the allocDir factory, err := libcontainer.New( path.Join(command.TaskDir, "../alloc/container"), @@ -194,15 +183,6 @@ func (l *LibcontainerExecutor) Launch(command *ExecCommand) (*ProcessState, erro return nil, err } - // Join process cgroups - containerState, err := container.State() - if err != nil { - l.logger.Error("error entering user process cgroups", "executor_pid", os.Getpid(), "error", err) - } - if err := cgroups.EnterPid(containerState.CgroupPaths, os.Getpid()); err != nil { - l.logger.Error("error entering user process cgroups", "executor_pid", os.Getpid(), "error", err) - } - // start a goroutine to wait on the process to complete, so Wait calls can // be multiplexed l.userProcExited = make(chan interface{}) @@ -287,15 +267,6 @@ func (l *LibcontainerExecutor) Shutdown(signal string, grace time.Duration) erro return nil } - // move executor to root cgroup - subsystems, err := cgroups.GetAllSubsystems() - if err != nil { - return err - } - if err := JoinRootCgroup(subsystems); err != nil { - return err - } - status, err := l.container.Status() if err != nil { return err @@ -720,33 +691,40 @@ func configureBasicCgroups(cfg *lconfigs.Config) error { id := uuid.Generate() // Manually create freezer cgroup - cfg.Cgroups.Paths = map[string]string{} - root, err := cgroups.FindCgroupMountpointDir() - if err != nil { - return err - } - if _, err := os.Stat(root); err != nil { - return err - } + subsystem := "freezer" - freezer := cgroupFs.FreezerGroup{} - subsystem := freezer.Name() - path, err := cgroups.FindCgroupMountpoint("", subsystem) + path, err := getCgroupPathHelper(subsystem, filepath.Join(defaultCgroupParent, id)) if err != nil { return fmt.Errorf("failed to find %s cgroup mountpoint: %v", subsystem, err) } - // Sometimes subsystems can be mounted together as 'cpu,cpuacct'. - path = filepath.Join(root, filepath.Base(path), defaultCgroupParent, id) if err = os.MkdirAll(path, 0755); err != nil { return err } - cfg.Cgroups.Paths[subsystem] = path + cfg.Cgroups.Paths = map[string]string{ + subsystem: path, + } return nil } +func getCgroupPathHelper(subsystem, cgroup string) (string, error) { + mnt, root, err := cgroups.FindCgroupMountpointAndRoot("", subsystem) + if err != nil { + return "", err + } + + // This is needed for nested containers, because in /proc/self/cgroup we + // see paths from host, which don't exist in container. + relCgroup, err := filepath.Rel(root, cgroup) + if err != nil { + return "", err + } + + return filepath.Join(mnt, relCgroup), nil +} + func newLibcontainerConfig(command *ExecCommand) (*lconfigs.Config, error) { cfg := &lconfigs.Config{ Cgroups: &lconfigs.Cgroup{ @@ -771,28 +749,6 @@ func newLibcontainerConfig(command *ExecCommand) (*lconfigs.Config, error) { return cfg, nil } -// JoinRootCgroup moves the current process to the cgroups of the init process -func JoinRootCgroup(subsystems []string) error { - mErrs := new(multierror.Error) - paths := map[string]string{} - for _, s := range subsystems { - mnt, _, err := cgroups.FindCgroupMountpointAndRoot("", s) - if err != nil { - multierror.Append(mErrs, fmt.Errorf("error getting cgroup path for subsystem: %s", s)) - continue - } - - paths[s] = mnt - } - - err := cgroups.EnterPid(paths, os.Getpid()) - if err != nil { - multierror.Append(mErrs, err) - } - - return mErrs.ErrorOrNil() -} - // cmdDevices converts a list of driver.DeviceConfigs into excutor.Devices. func cmdDevices(devices []*drivers.DeviceConfig) ([]*lconfigs.Device, error) { if len(devices) == 0 { diff --git a/drivers/shared/executor/executor_linux_test.go b/drivers/shared/executor/executor_linux_test.go index 1861ea5ae04b..619b219e18cb 100644 --- a/drivers/shared/executor/executor_linux_test.go +++ b/drivers/shared/executor/executor_linux_test.go @@ -19,6 +19,7 @@ import ( "github.com/hashicorp/nomad/nomad/mock" "github.com/hashicorp/nomad/plugins/drivers" tu "github.com/hashicorp/nomad/testutil" + "github.com/opencontainers/runc/libcontainer/cgroups" lconfigs "github.com/opencontainers/runc/libcontainer/configs" "github.com/stretchr/testify/require" "golang.org/x/sys/unix" @@ -217,6 +218,88 @@ func TestExecutor_CgroupPaths(t *testing.T) { }, func(err error) { t.Error(err) }) } +// TestExecutor_CgroupPaths asserts that all cgroups created for a task +// are destroyed on shutdown +func TestExecutor_CgroupPathsAreDestroyed(t *testing.T) { + t.Parallel() + require := require.New(t) + testutil.ExecCompatible(t) + + testExecCmd := testExecutorCommandWithChroot(t) + execCmd, allocDir := testExecCmd.command, testExecCmd.allocDir + execCmd.Cmd = "/bin/bash" + execCmd.Args = []string{"-c", "sleep 0.2; cat /proc/self/cgroup"} + defer allocDir.Destroy() + + execCmd.ResourceLimits = true + + executor := NewExecutorWithIsolation(testlog.HCLogger(t)) + defer executor.Shutdown("SIGKILL", 0) + + ps, err := executor.Launch(execCmd) + require.NoError(err) + require.NotZero(ps.Pid) + + state, err := executor.Wait(context.Background()) + require.NoError(err) + require.Zero(state.ExitCode) + + var cgroupsPaths string + tu.WaitForResult(func() (bool, error) { + output := strings.TrimSpace(testExecCmd.stdout.String()) + // sanity check that we got some cgroups + if !strings.Contains(output, ":devices:") { + return false, fmt.Errorf("was expected cgroup files but found:\n%v", output) + } + lines := strings.Split(output, "\n") + for _, line := range lines { + // Every cgroup entry should be /nomad/$ALLOC_ID + if line == "" { + continue + } + + // Skip rdma subsystem; rdma was added in most recent kernels and libcontainer/docker + // don't isolate it by default. + if strings.Contains(line, ":rdma:") { + continue + } + + if !strings.Contains(line, ":/nomad/") { + return false, fmt.Errorf("Not a member of the alloc's cgroup: expected=...:/nomad/... -- found=%q", line) + } + } + + cgroupsPaths = output + return true, nil + }, func(err error) { t.Error(err) }) + + // shutdown executor and test that cgroups are destroyed + executor.Shutdown("SIGKILL", 0) + + // test that the cgroup paths are not visible + tmpFile, err := ioutil.TempFile("", "") + require.NoError(err) + defer os.Remove(tmpFile.Name()) + + _, err = tmpFile.WriteString(cgroupsPaths) + require.NoError(err) + tmpFile.Close() + + subsystems, err := cgroups.ParseCgroupFile(tmpFile.Name()) + require.NoError(err) + + for subsystem, cgroup := range subsystems { + if !strings.Contains(cgroup, "nomad/") { + // this should only be rdma at this point + continue + } + + p, err := getCgroupPathHelper(subsystem, cgroup) + require.NoError(err) + require.Falsef(cgroups.PathExists(p), "cgroup for %s %s still exists", subsystem, cgroup) + } +} + func TestUniversalExecutor_LookupTaskBin(t *testing.T) { t.Parallel() require := require.New(t)