From 00c8cd37bb7cf41f3d66d7f21399a2437d84109e Mon Sep 17 00:00:00 2001 From: Seth Hoenig Date: Thu, 10 Nov 2022 08:30:27 -0600 Subject: [PATCH] template: protect use of template manager with a lock (#15192) This PR protects access to `templateHook.templateManager` with its lock. So far we have not been able to reproduce the panic - but it seems either Poststart is running without a Prestart being run first (should be impossible), or the Update hook is running concurrently with Poststart, nil-ing out the templateManager in a race with Poststart. Fixes #15189 --- .changelog/15192.txt | 3 +++ .../allocrunner/taskrunner/template_hook.go | 25 ++++++++++++------- 2 files changed, 19 insertions(+), 9 deletions(-) create mode 100644 .changelog/15192.txt diff --git a/.changelog/15192.txt b/.changelog/15192.txt new file mode 100644 index 000000000000..abe797edd61c --- /dev/null +++ b/.changelog/15192.txt @@ -0,0 +1,3 @@ +```release-note:bug +template: Fixed a bug where template could cause agent panic on startup +``` diff --git a/client/allocrunner/taskrunner/template_hook.go b/client/allocrunner/taskrunner/template_hook.go index 4e14fcc43f2a..275937988e27 100644 --- a/client/allocrunner/taskrunner/template_hook.go +++ b/client/allocrunner/taskrunner/template_hook.go @@ -116,11 +116,18 @@ func (h *templateHook) Prestart(ctx context.Context, req *interfaces.TaskPrestar } func (h *templateHook) Poststart(ctx context.Context, req *interfaces.TaskPoststartRequest, resp *interfaces.TaskPoststartResponse) error { + h.managerLock.Lock() + defer h.managerLock.Unlock() + + if h.templateManager == nil { + return nil + } + if req.DriverExec != nil { h.templateManager.SetDriverHandle(req.DriverExec) } else { - for _, template := range h.config.templates { - if template.ChangeMode == structs.TemplateChangeModeScript { + for _, tmpl := range h.config.templates { + if tmpl.ChangeMode == structs.TemplateChangeModeScript { return fmt.Errorf("template has change mode set to 'script' but the driver it uses does not provide exec capability") } } @@ -166,17 +173,17 @@ func (h *templateHook) Stop(ctx context.Context, req *interfaces.TaskStopRequest return nil } -// Handle new Vault token +// Update is used to handle updates to vault and/or nomad tokens. func (h *templateHook) Update(ctx context.Context, req *interfaces.TaskUpdateRequest, resp *interfaces.TaskUpdateResponse) error { h.managerLock.Lock() defer h.managerLock.Unlock() - // Nothing to do + // no template manager to manage if h.templateManager == nil { return nil } - // Check if either the Nomad or Vault tokens have changed + // neither vault or nomad token has been updated, nothing to do if req.VaultToken == h.vaultToken && req.NomadToken == h.nomadToken { return nil } else { @@ -184,15 +191,15 @@ func (h *templateHook) Update(ctx context.Context, req *interfaces.TaskUpdateReq h.nomadToken = req.NomadToken } - // Shutdown the old template + // shutdown the old template h.templateManager.Stop() h.templateManager = nil - // Create the new template + // create the new template if _, err := h.newManager(); err != nil { - err := fmt.Errorf("failed to build template manager: %v", err) + err = fmt.Errorf("failed to build template manager: %v", err) h.logger.Error("failed to build template manager", "error", err) - h.config.lifecycle.Kill(context.Background(), + _ = h.config.lifecycle.Kill(context.Background(), structs.NewTaskEvent(structs.TaskKilling). SetFailsTask(). SetDisplayMessage(fmt.Sprintf("Template update %v", err)))