diff --git a/CHANGELOG.md b/CHANGELOG.md index 497a99f0a1ae..bfb8a6d774ea 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,6 +20,7 @@ BUG FIXES: * cli: Make scoring column orders consistent `nomad alloc status` [[GH-6609](https://github.com/hashicorp/nomad/issues/6609)] * cli: Fixed a bug where a cli user may fail to query FS/Allocation API endpoints if they lack `node:read` capability [[GH-6423](https://github.com/hashicorp/nomad/issues/6423)] * client: Fixed a bug where a client may not restart dead internal processes upon client's restart on Windows [[GH-6426](https://github.com/hashicorp/nomad/issues/6426)] + * driver/exec: Fixed a bug where exec tasks can spawn processes that live beyond task lifecycle [[GH-6722](https://github.com/hashicorp/nomad/issues/6722)] * driver/docker: Added mechanism for detecting running unexpectedly running docker containers [[GH-6325](https://github.com/hashicorp/nomad/issues/6325)] * nomad: Multiple connect enabled services in the same taskgroup failed to register [[GH-6646](https://github.com/hashicorp/nomad/issues/6646)] diff --git a/drivers/exec/driver.go b/drivers/exec/driver.go index 87974ce06fff..6611ec472ce9 100644 --- a/drivers/exec/driver.go +++ b/drivers/exec/driver.go @@ -458,10 +458,8 @@ func (d *Driver) DestroyTask(taskID string, force bool) error { } if !handle.pluginClient.Exited() { - if handle.IsRunning() { - if err := handle.exec.Shutdown("", 0); err != nil { - handle.logger.Error("destroying executor failed", "err", err) - } + if err := handle.exec.Shutdown("", 0); err != nil { + handle.logger.Error("destroying executor failed", "err", err) } handle.pluginClient.Kill() diff --git a/drivers/exec/driver_test.go b/drivers/exec/driver_test.go index e0680eb49856..747fac64764b 100644 --- a/drivers/exec/driver_test.go +++ b/drivers/exec/driver_test.go @@ -7,9 +7,12 @@ import ( "io/ioutil" "os" "path/filepath" + "regexp" "runtime" + "strconv" "strings" "sync" + "syscall" "testing" "time" @@ -251,6 +254,106 @@ func TestExecDriver_StartWaitRecover(t *testing.T) { require.NoError(harness.DestroyTask(task.ID, true)) } +// TestExecDriver_DestroyKillsAll asserts that when TaskDestroy is called all +// task processes are cleaned up. +func TestExecDriver_DestroyKillsAll(t *testing.T) { + t.Parallel() + require := require.New(t) + ctestutils.ExecCompatible(t) + + d := NewExecDriver(testlog.HCLogger(t)) + harness := dtestutil.NewDriverHarness(t, d) + defer harness.Kill() + + task := &drivers.TaskConfig{ + ID: uuid.Generate(), + Name: "test", + } + + cleanup := harness.MkAllocDir(task, true) + defer cleanup() + + taskConfig := map[string]interface{}{} + taskConfig["command"] = "/bin/sh" + taskConfig["args"] = []string{"-c", fmt.Sprintf(`sleep 3600 & echo "SLEEP_PID=$!"`)} + + require.NoError(task.EncodeConcreteDriverConfig(&taskConfig)) + + handle, _, err := harness.StartTask(task) + require.NoError(err) + defer harness.DestroyTask(task.ID, true) + + ch, err := harness.WaitTask(context.Background(), handle.Config.ID) + require.NoError(err) + + select { + case result := <-ch: + require.True(result.Successful(), "command failed: %#v", result) + case <-time.After(10 * time.Second): + require.Fail("timeout waiting for task to shutdown") + } + + sleepPid := 0 + + // Ensure that the task is marked as dead, but account + // for WaitTask() closing channel before internal state is updated + testutil.WaitForResult(func() (bool, error) { + stdout, err := ioutil.ReadFile(filepath.Join(task.TaskDir().LogDir, "test.stdout.0")) + if err != nil { + return false, fmt.Errorf("failed to output pid file: %v", err) + } + + pidMatch := regexp.MustCompile(`SLEEP_PID=(\d+)`).FindStringSubmatch(string(stdout)) + if len(pidMatch) != 2 { + return false, fmt.Errorf("failed to find pid in %s", string(stdout)) + } + + pid, err := strconv.Atoi(pidMatch[1]) + if err != nil { + return false, fmt.Errorf("pid parts aren't int: %s", pidMatch[1]) + } + + sleepPid = pid + return true, nil + }, func(err error) { + require.NoError(err) + }) + + // isProcessRunning returns an error if process is not running + isProcessRunning := func(pid int) error { + process, err := os.FindProcess(pid) + if err != nil { + return fmt.Errorf("failed to find process: %s", err) + } + + err = process.Signal(syscall.Signal(0)) + if err != nil { + return fmt.Errorf("failed to signal process: %s", err) + } + + return nil + } + + require.NoError(isProcessRunning(sleepPid)) + + require.NoError(harness.DestroyTask(task.ID, true)) + + testutil.WaitForResult(func() (bool, error) { + err := isProcessRunning(sleepPid) + if err == nil { + return false, fmt.Errorf("child process is still running") + } + + if !strings.Contains(err.Error(), "failed to signal process") { + return false, fmt.Errorf("unexpected error: %v", err) + } + + return true, nil + }, func(err error) { + require.NoError(err) + }) +} + func TestExecDriver_Stats(t *testing.T) { t.Parallel() require := require.New(t) diff --git a/drivers/java/driver.go b/drivers/java/driver.go index 4d10f0e66b31..ac35aa82d72f 100644 --- a/drivers/java/driver.go +++ b/drivers/java/driver.go @@ -486,10 +486,8 @@ func (d *Driver) DestroyTask(taskID string, force bool) error { } if !handle.pluginClient.Exited() { - if handle.IsRunning() { - if err := handle.exec.Shutdown("", 0); err != nil { - handle.logger.Error("destroying executor failed", "err", err) - } + if err := handle.exec.Shutdown("", 0); err != nil { + handle.logger.Error("destroying executor failed", "err", err) } handle.pluginClient.Kill() diff --git a/drivers/qemu/driver.go b/drivers/qemu/driver.go index 8cbe7d9f8f97..156dd1104b74 100644 --- a/drivers/qemu/driver.go +++ b/drivers/qemu/driver.go @@ -534,10 +534,8 @@ func (d *Driver) DestroyTask(taskID string, force bool) error { } if !handle.pluginClient.Exited() { - if handle.IsRunning() { - if err := handle.exec.Shutdown("", 0); err != nil { - handle.logger.Error("destroying executor failed", "err", err) - } + if err := handle.exec.Shutdown("", 0); err != nil { + handle.logger.Error("destroying executor failed", "err", err) } handle.pluginClient.Kill() diff --git a/drivers/rawexec/driver.go b/drivers/rawexec/driver.go index a69932bb5ae1..9e754c5b383d 100644 --- a/drivers/rawexec/driver.go +++ b/drivers/rawexec/driver.go @@ -460,10 +460,8 @@ func (d *Driver) DestroyTask(taskID string, force bool) error { } if !handle.pluginClient.Exited() { - if handle.IsRunning() { - if err := handle.exec.Shutdown("", 0); err != nil { - handle.logger.Error("destroying executor failed", "err", err) - } + if err := handle.exec.Shutdown("", 0); err != nil { + handle.logger.Error("destroying executor failed", "err", err) } handle.pluginClient.Kill() diff --git a/drivers/rawexec/driver_unix_test.go b/drivers/rawexec/driver_unix_test.go index 8845d75d8722..77bb7c4acc73 100644 --- a/drivers/rawexec/driver_unix_test.go +++ b/drivers/rawexec/driver_unix_test.go @@ -4,7 +4,11 @@ package rawexec import ( "context" + "os" + "regexp" "runtime" + "strconv" + "syscall" "testing" "fmt" @@ -196,6 +200,111 @@ func TestRawExecDriver_StartWaitStop(t *testing.T) { require.NoError(harness.DestroyTask(task.ID, true)) } +// TestRawExecDriver_DestroyKillsAll asserts that when TaskDestroy is called all +// task processes are cleaned up. +func TestRawExecDriver_DestroyKillsAll(t *testing.T) { + t.Parallel() + + // This only works reliably with cgroup PID tracking, happens in linux only + if runtime.GOOS != "linux" { + t.Skip("Linux only test") + } + + require := require.New(t) + + d := newEnabledRawExecDriver(t) + harness := dtestutil.NewDriverHarness(t, d) + defer harness.Kill() + + task := &drivers.TaskConfig{ + ID: uuid.Generate(), + Name: "test", + } + + cleanup := harness.MkAllocDir(task, true) + defer cleanup() + + taskConfig := map[string]interface{}{} + taskConfig["command"] = "/bin/sh" + taskConfig["args"] = []string{"-c", fmt.Sprintf(`sleep 3600 & echo "SLEEP_PID=$!"`)} + + require.NoError(task.EncodeConcreteDriverConfig(&taskConfig)) + + handle, _, err := harness.StartTask(task) + require.NoError(err) + defer harness.DestroyTask(task.ID, true) + + ch, err := harness.WaitTask(context.Background(), handle.Config.ID) + require.NoError(err) + + select { + case result := <-ch: + require.True(result.Successful(), "command failed: %#v", result) + case <-time.After(10 * time.Second): + require.Fail("timeout waiting for task to shutdown") + } + + sleepPid := 0 + + // Ensure that the task is marked as dead, but account + // for WaitTask() closing channel before internal state is updated + testutil.WaitForResult(func() (bool, error) { + stdout, err := ioutil.ReadFile(filepath.Join(task.TaskDir().LogDir, "test.stdout.0")) + if err != nil { + return false, fmt.Errorf("failed to output pid file: %v", err) + } + + pidMatch := regexp.MustCompile(`SLEEP_PID=(\d+)`).FindStringSubmatch(string(stdout)) + if len(pidMatch) != 2 { + return false, fmt.Errorf("failed to find pid in %s", string(stdout)) + } + + pid, err := strconv.Atoi(pidMatch[1]) + if err != nil { + return false, fmt.Errorf("pid parts aren't int: %s", pidMatch[1]) + } + + sleepPid = pid + return true, nil + }, func(err error) { + require.NoError(err) + }) + + // isProcessRunning returns an error if process is not running + isProcessRunning := func(pid int) error { + process, err := os.FindProcess(pid) + if err != nil { + return fmt.Errorf("failed to find process: %s", err) + } + + err = process.Signal(syscall.Signal(0)) + if err != nil { + return fmt.Errorf("failed to signal process: %s", err) + } + + return nil + } + + require.NoError(isProcessRunning(sleepPid)) + + require.NoError(harness.DestroyTask(task.ID, true)) + + testutil.WaitForResult(func() (bool, error) { + err := isProcessRunning(sleepPid) + if err == nil { + return false, fmt.Errorf("child process is still running") + } + + if !strings.Contains(err.Error(), "failed to signal process") { + return false, fmt.Errorf("unexpected error: %v", err) + } + + return true, nil + }, func(err error) { + require.NoError(err) + }) +} + func TestRawExec_ExecTaskStreaming(t *testing.T) { t.Parallel() if runtime.GOOS == "darwin" { diff --git a/drivers/rkt/driver.go b/drivers/rkt/driver.go index b4145523881b..238dcfc77394 100644 --- a/drivers/rkt/driver.go +++ b/drivers/rkt/driver.go @@ -810,10 +810,8 @@ func (d *Driver) DestroyTask(taskID string, force bool) error { } if !handle.pluginClient.Exited() { - if handle.IsRunning() { - if err := handle.exec.Shutdown("", 0); err != nil { - handle.logger.Error("destroying executor failed", "err", err) - } + if err := handle.exec.Shutdown("", 0); err != nil { + handle.logger.Error("destroying executor failed", "err", err) } handle.pluginClient.Kill()