From 9c7de777833959b346991bab33af847ddb42c30c Mon Sep 17 00:00:00 2001 From: Tim Gross Date: Thu, 23 Dec 2021 11:50:51 -0500 Subject: [PATCH] task runner: fix goroutine leak in prestart hook (#11741) The task runner prestart hooks take a `joincontext` so they have the option to exit early if either of two contexts are canceled: from killing the task or client shutdown. Some tasks exit without being shutdown from the server, so neither of the joined contexts ever gets canceled and we leak the `joincontext` (48 bytes) and its internal goroutine. This primarily impacts batch jobs and any task that fails or completes early such as non-sidecar prestart lifecycle tasks. Cancel the `joincontext` after the prestart call exits to fix the leak. --- .changelog/11741.txt | 3 +++ client/allocrunner/interfaces/task_lifecycle.go | 4 +++- client/allocrunner/taskrunner/task_runner_hooks.go | 8 +++++--- 3 files changed, 11 insertions(+), 4 deletions(-) create mode 100644 .changelog/11741.txt diff --git a/.changelog/11741.txt b/.changelog/11741.txt new file mode 100644 index 000000000000..1302fc2e3e39 --- /dev/null +++ b/.changelog/11741.txt @@ -0,0 +1,3 @@ +```release-note:bug +client: Fixed a memory and goroutine leak for batch tasks and any task that exits without being shut down from the server +``` diff --git a/client/allocrunner/interfaces/task_lifecycle.go b/client/allocrunner/interfaces/task_lifecycle.go index ee99a507bd50..e02b1366faaf 100644 --- a/client/allocrunner/interfaces/task_lifecycle.go +++ b/client/allocrunner/interfaces/task_lifecycle.go @@ -89,7 +89,9 @@ type TaskPrestartHook interface { // Prestart is called before the task is started including after every // restart. Prestart is not called if the allocation is terminal. // - // The context is cancelled if the task is killed or shutdown. + // The context is cancelled if the task is killed or shutdown but + // should not be stored any persistent goroutines this Prestart + // creates. Prestart(context.Context, *TaskPrestartRequest, *TaskPrestartResponse) error } diff --git a/client/allocrunner/taskrunner/task_runner_hooks.go b/client/allocrunner/taskrunner/task_runner_hooks.go index 02c2525c4e9f..cf41e345be07 100644 --- a/client/allocrunner/taskrunner/task_runner_hooks.go +++ b/client/allocrunner/taskrunner/task_runner_hooks.go @@ -179,6 +179,11 @@ func (tr *TaskRunner) prestart() error { }() } + // use a join context to allow any blocking pre-start hooks + // to be canceled by either killCtx or shutdownCtx + joinedCtx, joinedCancel := joincontext.Join(tr.killCtx, tr.shutdownCtx) + defer joinedCancel() + for _, hook := range tr.runnerHooks { pre, ok := hook.(interfaces.TaskPrestartHook) if !ok { @@ -224,9 +229,6 @@ func (tr *TaskRunner) prestart() error { } // Run the prestart hook - // use a joint context to allow any blocking pre-start hooks - // to be canceled by either killCtx or shutdownCtx - joinedCtx, _ := joincontext.Join(tr.killCtx, tr.shutdownCtx) var resp interfaces.TaskPrestartResponse if err := pre.Prestart(joinedCtx, &req, &resp); err != nil { tr.emitHookError(err, name)