diff --git a/.changelog/15477.txt b/.changelog/15477.txt new file mode 100644 index 000000000000..4696f4813d7a --- /dev/null +++ b/.changelog/15477.txt @@ -0,0 +1,3 @@ +```release-note:bug +client: Fixed a bug where allocation cleanup hooks would not run +``` diff --git a/client/allocrunner/alloc_runner.go b/client/allocrunner/alloc_runner.go index 1d50c2595bb4..8a58fa80180b 100644 --- a/client/allocrunner/alloc_runner.go +++ b/client/allocrunner/alloc_runner.go @@ -125,7 +125,7 @@ type allocRunner struct { allocDir *allocdir.AllocDir // runnerHooks are alloc runner lifecycle hooks that should be run on state - // transistions. + // transitions. runnerHooks []interfaces.RunnerHook // hookState is the output of allocrunner hooks @@ -546,7 +546,9 @@ func (ar *allocRunner) handleTaskStateUpdates() { } } + // kill remaining live tasks if len(liveRunners) > 0 { + // if all live runners are sidecars - kill alloc onlySidecarsRemaining := hasSidecars && !hasNonSidecarTasks(liveRunners) if killEvent == nil && onlySidecarsRemaining { @@ -586,6 +588,14 @@ func (ar *allocRunner) handleTaskStateUpdates() { } } } else { + // there are no live runners left + + // run AR pre-kill hooks if this alloc is done, but not if it's because + // the agent is shutting down. + if !ar.isShuttingDown() && done { + ar.preKillHooks() + } + // If there are no live runners left kill all non-poststop task // runners to unblock them from the alloc restart loop. for _, tr := range ar.tasks { diff --git a/client/allocrunner/alloc_runner_hooks.go b/client/allocrunner/alloc_runner_hooks.go index c2d69ca0ebb4..8b39851fb75b 100644 --- a/client/allocrunner/alloc_runner_hooks.go +++ b/client/allocrunner/alloc_runner_hooks.go @@ -329,6 +329,7 @@ func (ar *allocRunner) destroy() error { func (ar *allocRunner) preKillHooks() { for _, hook := range ar.runnerHooks { pre, ok := hook.(interfaces.RunnerPreKillHook) + if !ok { continue } diff --git a/client/allocrunner/alloc_runner_test.go b/client/allocrunner/alloc_runner_test.go index 90b94657b3d0..48084099a704 100644 --- a/client/allocrunner/alloc_runner_test.go +++ b/client/allocrunner/alloc_runner_test.go @@ -6,6 +6,7 @@ import ( "io/ioutil" "os" "path/filepath" + "sync/atomic" "testing" "time" @@ -23,6 +24,8 @@ import ( "github.com/hashicorp/nomad/nomad/mock" "github.com/hashicorp/nomad/nomad/structs" "github.com/hashicorp/nomad/testutil" + "github.com/shoenig/test/must" + "github.com/shoenig/test/wait" "github.com/stretchr/testify/require" ) @@ -2398,3 +2401,43 @@ func TestHasSidecarTasks(t *testing.T) { }) } } + +type allocPreKillHook struct { + ran atomic.Bool +} + +func (*allocPreKillHook) Name() string { return "test_prekill" } + +func (h *allocPreKillHook) PreKill() { + h.ran.Store(true) +} + +func TestAllocRunner_PreKill_RunOnDone(t *testing.T) { + ci.Parallel(t) + + alloc := mock.Alloc() + task := alloc.Job.TaskGroups[0].Tasks[0] + task.Driver = "mock_driver" + task.Config = map[string]interface{}{"run_for": "2ms"} + alloc.DesiredStatus = "stop" + + conf, cleanup := testAllocRunnerConfig(t, alloc.Copy()) + t.Cleanup(cleanup) + + ar, err := NewAllocRunner(conf) + must.NoError(t, err) + + // set our custom prekill hook + hook := new(allocPreKillHook) + ar.runnerHooks = append(ar.runnerHooks, hook) + + go ar.Run() + defer destroy(ar) + + // wait for completion or timeout + must.Wait(t, wait.InitialSuccess( + wait.BoolFunc(hook.ran.Load), + wait.Timeout(5*time.Second), + wait.Gap(500*time.Millisecond), + )) +} diff --git a/client/allocrunner/alloc_runner_unix_test.go b/client/allocrunner/alloc_runner_unix_test.go index 0859569101e7..0c2492da2e58 100644 --- a/client/allocrunner/alloc_runner_unix_test.go +++ b/client/allocrunner/alloc_runner_unix_test.go @@ -1,5 +1,4 @@ //go:build !windows -// +build !windows package allocrunner