diff --git a/client/allocrunner/alloc_runner.go b/client/allocrunner/alloc_runner.go index 1f93f2070b25..9a8c0824b2cf 100644 --- a/client/allocrunner/alloc_runner.go +++ b/client/allocrunner/alloc_runner.go @@ -763,14 +763,15 @@ func (ar *allocRunner) destroyImpl() { // state if Run() ran at all. <-ar.taskStateUpdateHandlerCh - // Cleanup state db + // Mark alloc as destroyed + ar.destroyedLock.Lock() + + // Cleanup state db; while holding the lock to avoid + // a race periodic PersistState that may resurrect the alloc if err := ar.stateDB.DeleteAllocationBucket(ar.id); err != nil { ar.logger.Warn("failed to delete allocation state", "error", err) } - // Mark alloc as destroyed - ar.destroyedLock.Lock() - if !ar.shutdown { ar.shutdown = true close(ar.shutdownCh) @@ -782,6 +783,24 @@ func (ar *allocRunner) destroyImpl() { ar.destroyedLock.Unlock() } +func (ar *allocRunner) PersistState() error { + ar.destroyedLock.Lock() + defer ar.destroyedLock.Unlock() + + if ar.destroyed { + err := ar.stateDB.DeleteAllocationBucket(ar.id) + if err != nil { + ar.logger.Warn("failed to delete allocation bucket", "error", err) + } + return nil + } + + // TODO: consider persisting deployment state along with task status. + // While we study why only the alloc is persisted, I opted to maintain current + // behavior and not risk adding yet more IO calls unnecessarily. + return ar.stateDB.PutAllocation(ar.Alloc()) +} + // Destroy the alloc runner by stopping it if it is still running and cleaning // up all of its resources. // diff --git a/client/client.go b/client/client.go index 71bf7817834e..a91092dc915a 100644 --- a/client/client.go +++ b/client/client.go @@ -131,6 +131,7 @@ type AllocRunner interface { ShutdownCh() <-chan struct{} Signal(taskName, signal string) error GetTaskEventHandler(taskName string) drivermanager.EventHandler + PersistState() error RestartTask(taskName string, taskEvent *structs.TaskEvent) error RestartAll(taskEvent *structs.TaskEvent) error @@ -1121,7 +1122,7 @@ func (c *Client) saveState() error { for id, ar := range runners { go func(id string, ar AllocRunner) { - err := c.stateDB.PutAllocation(ar.Alloc()) + err := ar.PersistState() if err != nil { c.logger.Error("error saving alloc state", "error", err, "alloc_id", id) l.Lock()