From ae2ac8ab58b2d00237b690452dbc4e21ede2b3a6 Mon Sep 17 00:00:00 2001 From: Alex Dadgar Date: Wed, 19 Jul 2017 17:31:30 -0700 Subject: [PATCH 1/3] Should not persist state after alloc_runner is garbage collected --- client/alloc_runner.go | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/client/alloc_runner.go b/client/alloc_runner.go index bbb8e0128402..19657342d68f 100644 --- a/client/alloc_runner.go +++ b/client/alloc_runner.go @@ -88,7 +88,8 @@ type AllocRunner struct { // State related fields // stateDB is used to store the alloc runners state - stateDB *bolt.DB + stateDB *bolt.DB + allocStateLock sync.Mutex // persistedEval is the last persisted evaluation ID. Since evaluation // IDs change on every allocation update we only need to persist the @@ -352,6 +353,13 @@ func (r *AllocRunner) SaveState() error { } func (r *AllocRunner) saveAllocRunnerState() error { + r.allocStateLock.Lock() + defer r.allocStateLock.Unlock() + + if r.ctx.Err() == context.Canceled { + return nil + } + // Grab all the relevant data alloc := r.Alloc() @@ -445,6 +453,9 @@ func (r *AllocRunner) saveTaskRunnerState(tr *TaskRunner) error { // DestroyState is used to cleanup after ourselves func (r *AllocRunner) DestroyState() error { + r.allocStateLock.Lock() + defer r.allocStateLock.Unlock() + return r.stateDB.Update(func(tx *bolt.Tx) error { if err := deleteAllocationBucket(tx, r.allocID); err != nil { return fmt.Errorf("failed to delete allocation bucket: %v", err) @@ -994,6 +1005,11 @@ func (r *AllocRunner) shouldUpdate(serverIndex uint64) bool { // Destroy is used to indicate that the allocation context should be destroyed func (r *AllocRunner) Destroy() { + // Lock when closing the context as that gives the save state code + // serialization. + r.allocStateLock.Lock() + defer r.allocStateLock.Unlock() + r.exitFn() r.allocBroadcast.Close() } From 738321efa34c019876213ff3d996da8aa6f862b1 Mon Sep 17 00:00:00 2001 From: Michael Schurter Date: Thu, 20 Jul 2017 10:17:41 -0700 Subject: [PATCH 2/3] Don't save task runner state if it is destroyed --- client/task_runner.go | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/client/task_runner.go b/client/task_runner.go index db494c94c90a..f248bf8ad839 100644 --- a/client/task_runner.go +++ b/client/task_runner.go @@ -416,6 +416,13 @@ func pre06ScriptCheck(ver, driver string, services []*structs.Service) bool { // SaveState is used to snapshot our state func (r *TaskRunner) SaveState() error { + r.destroyLock.Lock() + defer r.destroyLock.Unlock() + if r.destroy { + // Don't save state if already destroyed + return nil + } + r.persistLock.Lock() defer r.persistLock.Unlock() snap := taskRunnerState{ From bb958ba7456d5813228826dc68097d39ed8c1392 Mon Sep 17 00:00:00 2001 From: Alex Dadgar Date: Thu, 20 Jul 2017 12:02:04 -0700 Subject: [PATCH 3/3] Destroy tasks that are part of terminal alloc --- client/alloc_runner.go | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/client/alloc_runner.go b/client/alloc_runner.go index 19657342d68f..4d58e1a1a9af 100644 --- a/client/alloc_runner.go +++ b/client/alloc_runner.go @@ -283,6 +283,7 @@ func (r *AllocRunner) RestoreState() error { } // Restore the task runners + taskDestroyEvent := structs.NewTaskEvent(structs.TaskKilled) var mErr multierror.Error for _, task := range tg.Tasks { name := task.Name @@ -300,14 +301,14 @@ func (r *AllocRunner) RestoreState() error { td = r.allocDir.NewTaskDir(name) } - tr := NewTaskRunner(r.logger, r.config, r.stateDB, r.setTaskState, td, r.Alloc(), task, r.vaultClient, r.consulClient) - r.tasks[name] = tr - // Skip tasks in terminal states. if state.State == structs.TaskStateDead { continue } + tr := NewTaskRunner(r.logger, r.config, r.stateDB, r.setTaskState, td, r.Alloc(), task, r.vaultClient, r.consulClient) + r.tasks[name] = tr + if restartReason, err := tr.RestoreState(); err != nil { r.logger.Printf("[ERR] client: failed to restore state for alloc %s task %q: %v", r.allocID, name, err) mErr.Errors = append(mErr.Errors, err) @@ -326,6 +327,8 @@ func (r *AllocRunner) RestoreState() error { r.logger.Printf("[INFO] client: restarting alloc %s task %s: %v", r.allocID, name, restartReason) tr.Restart("upgrade", restartReason) } + } else { + tr.Destroy(taskDestroyEvent) } }