From 380262613d3a6c4722fbfffb54ade8d94b746029 Mon Sep 17 00:00:00 2001
From: Mahmood Ali <mahmood@hashicorp.com>
Date: Sat, 29 Jun 2019 03:56:40 -0500
Subject: [PATCH 1/2] Fail alloc if alloc runner prestart hooks fail

When an alloc runner prestart hook fails, the task runners aren't invoked
and they remain in a pending state.

This leads to terrible results, some of which are:
* Lockup in GC process as reported in https://github.com/hashicorp/nomad/pull/5861
* Lockup in shutdown process as TR.Shutdown() waits for WaitCh to be closed
* Alloc not being restarted/rescheduled to another node (as it's still in
  pending state)
* Unexpected restart of alloc on a client restart, potentially days/weeks after
  alloc expected start time!

Here, we treat all tasks to have failed if alloc runner prestart hook fails.
This fixes the lockups, and permits the alloc to be rescheduled on another node.

While it's desirable to retry alloc runner in such failures, I opted to treat it
out of scope.  I'm afraid of some subtles about alloc and task runners and their
idempotency that's better handled in a follow up PR.

This might be one of the root causes for
https://github.com/hashicorp/nomad/issues/5840 .
---
 client/allocrunner/alloc_runner.go           |  8 +++
 client/allocrunner/alloc_runner_unix_test.go | 76 ++++++++++++++++++++
 client/allocrunner/taskrunner/task_runner.go | 22 ++++++
 3 files changed, 106 insertions(+)

diff --git a/client/allocrunner/alloc_runner.go b/client/allocrunner/alloc_runner.go
index 544f9ba6ffae..170dabe24424 100644
--- a/client/allocrunner/alloc_runner.go
+++ b/client/allocrunner/alloc_runner.go
@@ -248,10 +248,18 @@ func (ar *allocRunner) Run() {
 	default:
 	}
 
+	// When handling (potentially restored) terminal alloc, ensure tasks and post-run hooks are run
+	// to perform any cleanup that's necessary, potentially not done prior to earlier termination
+
 	// Run the prestart hooks if non-terminal
 	if ar.shouldRun() {
 		if err := ar.prerun(); err != nil {
 			ar.logger.Error("prerun failed", "error", err)
+
+			for _, tr := range ar.tasks {
+				tr.MarkFailedDead(fmt.Sprintf("failed to setup runner: %v", err))
+			}
+
 			goto POST
 		}
 	}
diff --git a/client/allocrunner/alloc_runner_unix_test.go b/client/allocrunner/alloc_runner_unix_test.go
index 8c31832e3c43..a438266a4956 100644
--- a/client/allocrunner/alloc_runner_unix_test.go
+++ b/client/allocrunner/alloc_runner_unix_test.go
@@ -215,3 +215,79 @@ func TestAllocRunner_Restore_CompletedBatch(t *testing.T) {
 	events := ar2.AllocState().TaskStates[task.Name].Events
 	require.Equal(t, initialRunEvents, events)
 }
+
+// TestAllocRunner_PreStartFailuresLeadToFailed asserts that if an alloc
+// prestart hooks failed, then the alloc and subsequent tasks transition
+// to failed state
+func TestAllocRunner_PreStartFailuresLeadToFailed(t *testing.T) {
+	t.Parallel()
+
+	alloc := mock.Alloc()
+	alloc.Job.Type = structs.JobTypeBatch
+	task := alloc.Job.TaskGroups[0].Tasks[0]
+	task.Driver = "mock_driver"
+	task.Config = map[string]interface{}{
+		"run_for": "2ms",
+	}
+	alloc.Job.TaskGroups[0].RestartPolicy = &structs.RestartPolicy{
+		Attempts: 0,
+	}
+
+	conf, cleanup := testAllocRunnerConfig(t, alloc.Copy())
+	defer cleanup()
+
+	// Maintain state for subsequent run
+	conf.StateDB = state.NewMemDB(conf.Logger)
+
+	// Start and wait for task to be running
+	ar, err := NewAllocRunner(conf)
+	require.NoError(t, err)
+
+	ar.runnerHooks = append(ar.runnerHooks, &allocFailingPrestartHook{})
+
+	go ar.Run()
+	defer destroy(ar)
+
+	select {
+	case <-ar.WaitCh():
+	case <-time.After(10 * time.Second):
+		require.Fail(t, "alloc.waitCh wasn't closed")
+	}
+
+	testutil.WaitForResult(func() (bool, error) {
+		s := ar.AllocState()
+		if s.ClientStatus != structs.AllocClientStatusFailed {
+			return false, fmt.Errorf("expected complete, got %s", s.ClientStatus)
+		}
+		return true, nil
+	}, func(err error) {
+		require.NoError(t, err)
+	})
+
+	// once job finishes, it shouldn't run again
+	require.False(t, ar.shouldRun())
+	initialRunEvents := ar.AllocState().TaskStates[task.Name].Events
+	require.Len(t, initialRunEvents, 2)
+
+	ls, ts, err := conf.StateDB.GetTaskRunnerState(alloc.ID, task.Name)
+	require.NoError(t, err)
+	require.NotNil(t, ls)
+	require.NotNil(t, ts)
+	require.Equal(t, structs.TaskStateDead, ts.State)
+	require.True(t, ts.Failed)
+
+	// TR waitCh must be closed too!
+	select {
+	case <-ar.tasks[task.Name].WaitCh():
+	case <-time.After(10 * time.Second):
+		require.Fail(t, "tr.waitCh wasn't closed")
+	}
+}
+
+type allocFailingPrestartHook struct{}
+
+func (*allocFailingPrestartHook) Name() string { return "failing_prestart" }
+
+func (*allocFailingPrestartHook) Prerun() error {
+	return fmt.Errorf("failing prestart hooks")
+}
diff --git a/client/allocrunner/taskrunner/task_runner.go b/client/allocrunner/taskrunner/task_runner.go
index 233a97b116c6..cf99d8c3786f 100644
--- a/client/allocrunner/taskrunner/task_runner.go
+++ b/client/allocrunner/taskrunner/task_runner.go
@@ -388,6 +388,28 @@ func (tr *TaskRunner) initLabels() {
 	}
 }
 
+// Mark a task as failed and not to run.  Aimed to be invoked when alloc runner
+// prestart hooks failed.
+// Should never be called with Run().
+func (tr *TaskRunner) MarkFailedDead(reason string) {
+	defer close(tr.waitCh)
+
+	tr.stateLock.Lock()
+	if err := tr.stateDB.PutTaskRunnerLocalState(tr.allocID, tr.taskName, tr.localState); err != nil {
+		//TODO Nomad will be unable to restore this task; try to kill
+		//     it now and fail? In general we prefer to leave running
+		//     tasks running even if the agent encounters an error.
+		tr.logger.Warn("error persisting local failed task state; may be unable to restore after a Nomad restart",
+			"error", err)
+	}
+	tr.stateLock.Unlock()
+
+	event := structs.NewTaskEvent(structs.TaskSetupFailure).
+		SetDisplayMessage(reason).
+		SetFailsTask()
+	tr.UpdateState(structs.TaskStateDead, event)
+}
+
 // Run the TaskRunner. Starts the user's task or reattaches to a restored task.
 // Run closes WaitCh when it exits. Should be started in a goroutine.
 func (tr *TaskRunner) Run() {

From 99802390c19d950f2ba194dda0d6bf2d9158c717 Mon Sep 17 00:00:00 2001
From: Mahmood Ali <mahmood@hashicorp.com>
Date: Tue, 2 Jul 2019 18:37:52 +0800
Subject: [PATCH 2/2] run post-run/post-stop task runner hooks

Handle when prestart failed while restoring a task, to prevent
accidentally leaking consul/logmon processes.
---
 client/allocrunner/taskrunner/task_runner.go | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/client/allocrunner/taskrunner/task_runner.go b/client/allocrunner/taskrunner/task_runner.go
index cf99d8c3786f..ac9d7a1c2607 100644
--- a/client/allocrunner/taskrunner/task_runner.go
+++ b/client/allocrunner/taskrunner/task_runner.go
@@ -408,6 +408,11 @@ func (tr *TaskRunner) MarkFailedDead(reason string) {
 		SetDisplayMessage(reason).
 		SetFailsTask()
 	tr.UpdateState(structs.TaskStateDead, event)
+
+	// Run the stop hooks in case task was a restored task that failed prestart
+	if err := tr.stop(); err != nil {
+		tr.logger.Error("stop failed while marking task dead", "error", err)
+	}
 }
 
 // Run the TaskRunner. Starts the user's task or reattaches to a restored task.