hashicorp · schmichael · Jan 6, 2017 · Dec 3, 2016 · Jan 3, 2017 · Jan 4, 2017
diff --git a/client/alloc_runner.go b/client/alloc_runner.go
@@ -11,7 +11,6 @@ import (
 	"github.com/hashicorp/go-multierror"
 	"github.com/hashicorp/nomad/client/allocdir"
 	"github.com/hashicorp/nomad/client/config"
-	"github.com/hashicorp/nomad/client/driver"
 	"github.com/hashicorp/nomad/client/vaultclient"
 	"github.com/hashicorp/nomad/nomad/structs"
 
@@ -48,8 +47,9 @@ type AllocRunner struct {
 
 	dirtyCh chan struct{}
 
-	ctx        *driver.ExecContext
-	ctxLock    sync.Mutex
+	allocDir     *allocdir.AllocDir
+	allocDirLock sync.Mutex
+
 	tasks      map[string]*TaskRunner
 	taskStates map[string]*structs.TaskState
 	restored   map[string]struct{}
@@ -76,9 +76,22 @@ type AllocRunner struct {
 type allocRunnerState struct {
 	Version                string
 	Alloc                  *structs.Allocation
+	AllocDir               *allocdir.AllocDir
 	AllocClientStatus      string
 	AllocClientDescription string
-	Context                *driver.ExecContext
+
+	// COMPAT: Remove in 0.7.0: removing will break upgrading directly from
+	//         0.5.2, so don't remove in the 0.6 series.
+	// Context is deprecated and only used to migrate from older releases.
+	// It will be removed in the future.
+	Context *struct {
+		AllocID  string // unused; included for completeness
+		AllocDir struct {
+			AllocDir  string
+			SharedDir string // unused; included for completeness
+			TaskDirs  map[string]string
+		}
+	} `json:"Context,omitempty"`
 }
 
 // NewAllocRunner is used to create a new allocation context
@@ -117,18 +130,28 @@ func (r *AllocRunner) RestoreState() error {
 		return err
 	}
 
+	// #2132 Upgrade path: if snap.AllocDir is nil, try to convert old
+	// Context struct to new AllocDir struct
+	if snap.AllocDir == nil && snap.Context != nil {
+		r.logger.Printf("[DEBUG] client: migrating state snapshot for alloc %q", r.alloc.ID)
+		snap.AllocDir = allocdir.NewAllocDir(r.logger, snap.Context.AllocDir.AllocDir)
+		for taskName := range snap.Context.AllocDir.TaskDirs {
+			snap.AllocDir.NewTaskDir(taskName)
+		}
+	}
+
 	// Restore fields
 	r.alloc = snap.Alloc
-	r.ctx = snap.Context
+	r.allocDir = snap.AllocDir
 	r.allocClientStatus = snap.AllocClientStatus
 	r.allocClientDescription = snap.AllocClientDescription
 
 	var snapshotErrors multierror.Error
 	if r.alloc == nil {
 		snapshotErrors.Errors = append(snapshotErrors.Errors, fmt.Errorf("alloc_runner snapshot includes a nil allocation"))
 	}
-	if r.ctx == nil {
-		snapshotErrors.Errors = append(snapshotErrors.Errors, fmt.Errorf("alloc_runner snapshot includes a nil context"))
+	if r.allocDir == nil {
+		snapshotErrors.Errors = append(snapshotErrors.Errors, fmt.Errorf("alloc_runner snapshot includes a nil alloc dir"))
 	}
 	if e := snapshotErrors.ErrorOrNil(); e != nil {
 		return e
@@ -142,9 +165,16 @@ func (r *AllocRunner) RestoreState() error {
 		// Mark the task as restored.
 		r.restored[name] = struct{}{}
 
+		td, ok := r.allocDir.TaskDirs[name]
+		if !ok {
+			err := fmt.Errorf("failed to find task dir metadata for alloc %q task %q",
+				r.alloc.ID, name)
+			r.logger.Printf("[ERR] client: %v", err)
+			return err
+		}
+
 		task := &structs.Task{Name: name}
-		tr := NewTaskRunner(r.logger, r.config, r.setTaskState, r.ctx, r.Alloc(),
-			task, r.vaultClient)
+		tr := NewTaskRunner(r.logger, r.config, r.setTaskState, td, r.Alloc(), task, r.vaultClient)
 		r.tasks[name] = tr
 
 		// Skip tasks in terminal states.
@@ -166,10 +196,7 @@ func (r *AllocRunner) RestoreState() error {
 
 // GetAllocDir returns the alloc dir for the alloc runner
 func (r *AllocRunner) GetAllocDir() *allocdir.AllocDir {
-	if r.ctx == nil {
-		return nil
-	}
-	return r.ctx.AllocDir
+	return r.allocDir
 }
 
 // SaveState is used to snapshot the state of the alloc runner
@@ -204,14 +231,14 @@ func (r *AllocRunner) saveAllocRunnerState() error {
 	allocClientDescription := r.allocClientDescription
 	r.allocLock.Unlock()
 
-	r.ctxLock.Lock()
-	ctx := r.ctx
-	r.ctxLock.Unlock()
+	r.allocDirLock.Lock()
+	allocDir := r.allocDir
+	r.allocDirLock.Unlock()
 
 	snap := allocRunnerState{
 		Version:                r.config.Version,
 		Alloc:                  alloc,
-		Context:                ctx,
+		AllocDir:               allocDir,
 		AllocClientStatus:      allocClientStatus,
 		AllocClientDescription: allocClientDescription,
 	}
@@ -233,7 +260,7 @@ func (r *AllocRunner) DestroyState() error {
 
 // DestroyContext is used to destroy the context
 func (r *AllocRunner) DestroyContext() error {
-	return r.ctx.AllocDir.Destroy()
+	return r.allocDir.Destroy()
 }
 
 // copyTaskStates returns a copy of the passed task states.
@@ -409,26 +436,27 @@ func (r *AllocRunner) Run() {
 	}
 
 	// Create the execution context
-	r.ctxLock.Lock()
-	if r.ctx == nil {
-		allocDir := allocdir.NewAllocDir(filepath.Join(r.config.AllocDir, r.alloc.ID))
-		if err := allocDir.Build(tg.Tasks); err != nil {
+	r.allocDirLock.Lock()
+	if r.allocDir == nil {
+		// Build allocation directory
+		r.allocDir = allocdir.NewAllocDir(r.logger, filepath.Join(r.config.AllocDir, r.alloc.ID))
+		if err := r.allocDir.Build(); err != nil {
 			r.logger.Printf("[WARN] client: failed to build task directories: %v", err)
 			r.setStatus(structs.AllocClientStatusFailed, fmt.Sprintf("failed to build task dirs for '%s'", alloc.TaskGroup))
-			r.ctxLock.Unlock()
+			r.allocDirLock.Unlock()
 			return
 		}
-		r.ctx = driver.NewExecContext(allocDir, r.alloc.ID)
+
 		if r.otherAllocDir != nil {
-			if err := allocDir.Move(r.otherAllocDir, tg.Tasks); err != nil {
+			if err := r.allocDir.Move(r.otherAllocDir, tg.Tasks); err != nil {
 				r.logger.Printf("[ERROR] client: failed to move alloc dir into alloc %q: %v", r.alloc.ID, err)
 			}
 			if err := r.otherAllocDir.Destroy(); err != nil {
 				r.logger.Printf("[ERROR] client: error destroying allocdir %v", r.otherAllocDir.AllocDir, err)
 			}
 		}
 	}
-	r.ctxLock.Unlock()
+	r.allocDirLock.Unlock()
 
 	// Check if the allocation is in a terminal status. In this case, we don't
 	// start any of the task runners and directly wait for the destroy signal to
@@ -448,7 +476,11 @@ func (r *AllocRunner) Run() {
 			continue
 		}
 
-		tr := NewTaskRunner(r.logger, r.config, r.setTaskState, r.ctx, r.Alloc(), task.Copy(), r.vaultClient)
+		r.allocDirLock.Lock()
+		taskdir := r.allocDir.NewTaskDir(task.Name)
+		r.allocDirLock.Unlock()
+
+		tr := NewTaskRunner(r.logger, r.config, r.setTaskState, taskdir, r.Alloc(), task.Copy(), r.vaultClient)
 		r.tasks[task.Name] = tr
 		tr.MarkReceived()