From 4826d8464efcf94acf82e404beb4cda2162fe4de Mon Sep 17 00:00:00 2001 From: Alex Dadgar Date: Tue, 28 Feb 2017 10:29:12 -0800 Subject: [PATCH] Fix two issues during client restore state This PR fixes two issues: 1) A close of a nil stopCollection channel when restoring and prestart fails. The failure will cause the killCh to be triggered which will close collection before it has been initialized. 2) Fixes a deadlock in which the handleWaitCh is never triggered since it is not initialized when there is an error in prestart and the killCh is triggered. Both fixes are by maintaining the loop invariant that the two channels are valid after there is a handle. --- client/task_runner.go | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/client/task_runner.go b/client/task_runner.go index 7f35234132a0..a18769503162 100644 --- a/client/task_runner.go +++ b/client/task_runner.go @@ -874,6 +874,18 @@ func (r *TaskRunner) run() { var stopCollection chan struct{} var handleWaitCh chan *dstructs.WaitResult + // If we already have a handle, populate the stopCollection and handleWaitCh + // to fix the invariant that it exists. + r.handleLock.Lock() + handleEmpty := r.handle == nil + r.handleLock.Unlock() + + if !handleEmpty { + stopCollection = make(chan struct{}) + go r.collectResourceUsageStats(stopCollection) + handleWaitCh = r.handle.WaitCh() + } + for { // Do the prestart activities prestartResultCh := make(chan bool, 1) @@ -895,7 +907,6 @@ func (r *TaskRunner) run() { r.handleLock.Lock() handleEmpty := r.handle == nil r.handleLock.Unlock() - if handleEmpty { startErr := r.startTask() r.restartTracker.SetStartError(startErr) @@ -909,14 +920,14 @@ func (r *TaskRunner) run() { r.runningLock.Lock() r.running = true r.runningLock.Unlock() - } - if stopCollection == nil { - stopCollection = make(chan struct{}) - go r.collectResourceUsageStats(stopCollection) - } + if stopCollection == nil { + stopCollection = make(chan struct{}) + go r.collectResourceUsageStats(stopCollection) + } - handleWaitCh = r.handle.WaitCh() + handleWaitCh = r.handle.WaitCh() + } case waitRes := <-handleWaitCh: if waitRes == nil {