From 607e7f2ddee02daa2265ee337216a10c9bb7aae1 Mon Sep 17 00:00:00 2001 From: Mahmood Ali Date: Fri, 4 Jan 2019 16:05:40 -0500 Subject: [PATCH 1/6] remove always false parameter Simplify allocDir.Build() function to avoid depending on client/structs, and remove a parameter that's always set to `false`. The motivation here is to avoid a dependency cycle between drivers/cstructs and alloc_dir. --- client/allocdir/alloc_dir_test.go | 13 ++++++------ client/allocdir/task_dir.go | 21 +++++++------------ client/allocdir/task_dir_test.go | 5 ++--- .../allocrunner/taskrunner/task_dir_hook.go | 2 +- 4 files changed, 16 insertions(+), 25 deletions(-) diff --git a/client/allocdir/alloc_dir_test.go b/client/allocdir/alloc_dir_test.go index ac9bad946cf9..6c39950b86f5 100644 --- a/client/allocdir/alloc_dir_test.go +++ b/client/allocdir/alloc_dir_test.go @@ -14,7 +14,6 @@ import ( "syscall" "testing" - cstructs "github.com/hashicorp/nomad/client/structs" "github.com/hashicorp/nomad/helper/testlog" "github.com/hashicorp/nomad/nomad/structs" "github.com/stretchr/testify/require" @@ -112,11 +111,11 @@ func TestAllocDir_MountSharedAlloc(t *testing.T) { // Build 2 task dirs td1 := d.NewTaskDir(t1.Name) - if err := td1.Build(false, nil, cstructs.FSIsolationChroot); err != nil { + if err := td1.Build(true, nil); err != nil { t.Fatalf("error build task=%q dir: %v", t1.Name, err) } td2 := d.NewTaskDir(t2.Name) - if err := td2.Build(false, nil, cstructs.FSIsolationChroot); err != nil { + if err := td2.Build(true, nil); err != nil { t.Fatalf("error build task=%q dir: %v", t2.Name, err) } @@ -157,11 +156,11 @@ func TestAllocDir_Snapshot(t *testing.T) { // Build 2 task dirs td1 := d.NewTaskDir(t1.Name) - if err := td1.Build(false, nil, cstructs.FSIsolationImage); err != nil { + if err := td1.Build(false, nil); err != nil { t.Fatalf("error build task=%q dir: %v", t1.Name, err) } td2 := d.NewTaskDir(t2.Name) - if err := td2.Build(false, nil, cstructs.FSIsolationImage); err != nil { + if err := td2.Build(false, nil); err != nil { t.Fatalf("error build task=%q dir: %v", t2.Name, err) } @@ -249,7 +248,7 @@ func TestAllocDir_Move(t *testing.T) { defer d2.Destroy() td1 := d1.NewTaskDir(t1.Name) - if err := td1.Build(false, nil, cstructs.FSIsolationImage); err != nil { + if err := td1.Build(false, nil); err != nil { t.Fatalf("TaskDir.Build() faild: %v", err) } @@ -345,7 +344,7 @@ func TestAllocDir_ReadAt_SecretDir(t *testing.T) { defer d.Destroy() td := d.NewTaskDir(t1.Name) - if err := td.Build(false, nil, cstructs.FSIsolationImage); err != nil { + if err := td.Build(false, nil); err != nil { t.Fatalf("TaskDir.Build() failed: %v", err) } diff --git a/client/allocdir/task_dir.go b/client/allocdir/task_dir.go index 9c4a602ae9d1..9d99f9717542 100644 --- a/client/allocdir/task_dir.go +++ b/client/allocdir/task_dir.go @@ -7,7 +7,6 @@ import ( "path/filepath" hclog "github.com/hashicorp/go-hclog" - cstructs "github.com/hashicorp/nomad/client/structs" ) // TaskDir contains all of the paths relevant to a task. All paths are on the @@ -75,7 +74,7 @@ func (t *TaskDir) Copy() *TaskDir { // Build default directories and permissions in a task directory. chrootCreated // allows skipping chroot creation if the caller knows it has already been // done. -func (t *TaskDir) Build(chrootCreated bool, chroot map[string]string, fsi cstructs.FSIsolation) error { +func (t *TaskDir) Build(createChroot bool, chroot map[string]string) error { if err := os.MkdirAll(t.Dir, 0777); err != nil { return err } @@ -110,7 +109,7 @@ func (t *TaskDir) Build(chrootCreated bool, chroot map[string]string, fsi cstruc // Image based isolation will bind the shared alloc dir in the driver. // If there's no isolation the task will use the host path to the // shared alloc dir. - if fsi == cstructs.FSIsolationChroot { + if createChroot { // If the path doesn't exist OR it exists and is empty, link it empty, _ := pathEmpty(t.SharedTaskDir) if !pathExists(t.SharedTaskDir) || empty { @@ -130,8 +129,8 @@ func (t *TaskDir) Build(chrootCreated bool, chroot map[string]string, fsi cstruc } // Build chroot if chroot filesystem isolation is going to be used - if fsi == cstructs.FSIsolationChroot { - if err := t.buildChroot(chrootCreated, chroot); err != nil { + if createChroot { + if err := t.buildChroot(chroot); err != nil { return err } } @@ -142,15 +141,9 @@ func (t *TaskDir) Build(chrootCreated bool, chroot map[string]string, fsi cstruc // buildChroot takes a mapping of absolute directory or file paths on the host // to their intended, relative location within the task directory. This // attempts hardlink and then defaults to copying. If the path exists on the -// host and can't be embedded an error is returned. If chrootCreated is true -// skip expensive embedding operations and only ephemeral operations (eg -// mounting /dev) are done. -func (t *TaskDir) buildChroot(chrootCreated bool, entries map[string]string) error { - if !chrootCreated { - // Link/copy chroot entries - return t.embedDirs(entries) - } - return nil +// host and can't be embedded an error is returned. +func (t *TaskDir) buildChroot(entries map[string]string) error { + return t.embedDirs(entries) } func (t *TaskDir) embedDirs(entries map[string]string) error { diff --git a/client/allocdir/task_dir_test.go b/client/allocdir/task_dir_test.go index a4c1b7dc907e..b39c275f2ba1 100644 --- a/client/allocdir/task_dir_test.go +++ b/client/allocdir/task_dir_test.go @@ -6,7 +6,6 @@ import ( "path/filepath" "testing" - cstructs "github.com/hashicorp/nomad/client/structs" "github.com/hashicorp/nomad/helper/testlog" ) @@ -104,7 +103,7 @@ func TestTaskDir_NonRoot_Image(t *testing.T) { t.Fatalf("Build() failed: %v", err) } - if err := td.Build(false, nil, cstructs.FSIsolationImage); err != nil { + if err := td.Build(false, nil); err != nil { t.Fatalf("TaskDir.Build failed: %v", err) } } @@ -127,7 +126,7 @@ func TestTaskDir_NonRoot(t *testing.T) { t.Fatalf("Build() failed: %v", err) } - if err := td.Build(false, nil, cstructs.FSIsolationNone); err != nil { + if err := td.Build(false, nil); err != nil { t.Fatalf("TaskDir.Build failed: %v", err) } diff --git a/client/allocrunner/taskrunner/task_dir_hook.go b/client/allocrunner/taskrunner/task_dir_hook.go index 42c5ecc25182..9b18c8628758 100644 --- a/client/allocrunner/taskrunner/task_dir_hook.go +++ b/client/allocrunner/taskrunner/task_dir_hook.go @@ -44,7 +44,7 @@ func (h *taskDirHook) Prestart(ctx context.Context, req *interfaces.TaskPrestart // Build the task directory structure fsi := h.runner.driverCapabilities.FSIsolation - err := h.runner.taskDir.Build(false, chroot, fsi) + err := h.runner.taskDir.Build(fsi == cstructs.FSIsolationChroot, chroot) if err != nil { return err } From 2831088fc5cde8102cf6dd10dedad19a7ab0996e Mon Sep 17 00:00:00 2001 From: Mahmood Ali Date: Fri, 4 Jan 2019 18:01:57 -0500 Subject: [PATCH 2/6] remove deprecated allocrunner --- client/allocrunnerdeprecated/alloc_runner.go | 1173 ---------- .../alloc_runner_health_watcher.go | 579 ----- .../alloc_runner_test.go | 1414 ------------ .../taskrunner/consul_template.go | 690 ------ .../taskrunner/consul_template_test.go | 1323 ----------- .../taskrunner/getters.go | 21 - .../taskrunner/task_runner.go | 1970 ---------------- .../taskrunner/task_runner_test.go | 2035 ----------------- .../taskrunner/task_runner_unix_test.go | 73 - client/allocrunnerdeprecated/testing.go | 70 - 10 files changed, 9348 deletions(-) delete mode 100644 client/allocrunnerdeprecated/alloc_runner.go delete mode 100644 client/allocrunnerdeprecated/alloc_runner_health_watcher.go delete mode 100644 client/allocrunnerdeprecated/alloc_runner_test.go delete mode 100644 client/allocrunnerdeprecated/taskrunner/consul_template.go delete mode 100644 client/allocrunnerdeprecated/taskrunner/consul_template_test.go delete mode 100644 client/allocrunnerdeprecated/taskrunner/getters.go delete mode 100644 client/allocrunnerdeprecated/taskrunner/task_runner.go delete mode 100644 client/allocrunnerdeprecated/taskrunner/task_runner_test.go delete mode 100644 client/allocrunnerdeprecated/taskrunner/task_runner_unix_test.go delete mode 100644 client/allocrunnerdeprecated/testing.go diff --git a/client/allocrunnerdeprecated/alloc_runner.go b/client/allocrunnerdeprecated/alloc_runner.go deleted file mode 100644 index 69912380da79..000000000000 --- a/client/allocrunnerdeprecated/alloc_runner.go +++ /dev/null @@ -1,1173 +0,0 @@ -// +build deprecated - -package allocrunner - -import ( - "context" - "fmt" - "log" - "path/filepath" - "sync" - "time" - - metrics "github.com/armon/go-metrics" - "github.com/boltdb/bolt" - "github.com/hashicorp/go-multierror" - "github.com/hashicorp/nomad/client/allocdir" - "github.com/hashicorp/nomad/client/allocrunnerdeprecated/taskrunner" - "github.com/hashicorp/nomad/client/allocwatcher" - "github.com/hashicorp/nomad/client/config" - consulApi "github.com/hashicorp/nomad/client/consul" - "github.com/hashicorp/nomad/client/vaultclient" - "github.com/hashicorp/nomad/helper" - "github.com/hashicorp/nomad/nomad/structs" - - cstructs "github.com/hashicorp/nomad/client/structs" -) - -var ( -// The following are the key paths written to the state database -//allocRunnerStateAllocKey = []byte("alloc") -//allocRunnerStateImmutableKey = []byte("immutable") -//allocRunnerStateMutableKey = []byte("mutable") -//allocRunnerStateAllocDirKey = []byte("alloc-dir") -) - -// AllocStateUpdater is used to update the status of an allocation -type AllocStateUpdater func(alloc *structs.Allocation) - -type AllocStatsReporter interface { - LatestAllocStats(taskFilter string) (*cstructs.AllocResourceUsage, error) -} - -// AllocRunner is used to wrap an allocation and provide the execution context. -type AllocRunner struct { - config *config.Config - updater AllocStateUpdater - logger *log.Logger - - // allocID is the ID of this runner's allocation. Since it does not - // change for the lifetime of the AllocRunner it is safe to read - // without acquiring a lock (unlike alloc). - allocID string - - alloc *structs.Allocation - allocClientStatus string // Explicit status of allocation. Set when there are failures - allocClientDescription string - allocHealth *bool // Whether the allocation is healthy - allocHealthTime time.Time // Time at which allocation health has been set - allocBroadcast *cstructs.AllocBroadcaster - allocLock sync.Mutex - - dirtyCh chan struct{} - - allocDir *allocdir.AllocDir - allocDirLock sync.Mutex - - tasks map[string]*taskrunner.TaskRunner - taskStates map[string]*structs.TaskState - restored map[string]struct{} - taskLock sync.RWMutex - - taskStatusLock sync.RWMutex - - updateCh chan *structs.Allocation - - vaultClient vaultclient.VaultClient - consulClient consulApi.ConsulServiceAPI - - // prevAlloc allows for Waiting until a previous allocation exits and - // the migrates it data. If sticky volumes aren't used and there's no - // previous allocation a noop implementation is used so it always safe - // to call. - prevAlloc allocwatcher.PrevAllocWatcher - - // ctx is cancelled with exitFn to cause the alloc to be destroyed - // (stopped and GC'd). - ctx context.Context - exitFn context.CancelFunc - - // waitCh is closed when the Run method exits. At that point the alloc - // has stopped and been GC'd. - waitCh chan struct{} - - // State related fields - // stateDB is used to store the alloc runners state - stateDB *bolt.DB - allocStateLock sync.Mutex - - // persistedEval is the last persisted evaluation ID. Since evaluation - // IDs change on every allocation update we only need to persist the - // allocation when its eval ID != the last persisted eval ID. - persistedEvalLock sync.Mutex - persistedEval string - - // immutablePersisted and allocDirPersisted are used to track whether the - // immutable data and the alloc dir have been persisted. Once persisted we - // can lower write volume by not re-writing these values - immutablePersisted bool - allocDirPersisted bool - - // baseLabels are used when emitting tagged metrics. All alloc runner metrics - // will have these tags, and optionally more. - baseLabels []metrics.Label -} - -// allocRunnerAllocState is state that only has to be written when the alloc -// changes. -//type allocRunnerAllocState struct { -//Alloc *structs.Allocation -//} - -//// allocRunnerImmutableState is state that only has to be written once. -//type allocRunnerImmutableState struct { -//Version string -//} - -// allocRunnerMutableState is state that has to be written on each save as it -// changes over the life-cycle of the alloc_runner. -//type allocRunnerMutableState struct { -//AllocClientStatus string -//AllocClientDescription string -//TaskStates map[string]*structs.TaskState -//DeploymentStatus *structs.AllocDeploymentStatus -//} - -// NewAllocRunner is used to create a new allocation context -func NewAllocRunner(logger *log.Logger, config *config.Config, stateDB *bolt.DB, updater AllocStateUpdater, - alloc *structs.Allocation, vaultClient vaultclient.VaultClient, consulClient consulApi.ConsulServiceAPI, - prevAlloc allocwatcher.PrevAllocWatcher) *AllocRunner { - - ar := &AllocRunner{ - config: config, - stateDB: stateDB, - updater: updater, - logger: logger, - alloc: alloc, - allocID: alloc.ID, - //allocBroadcast: cstructs.NewAllocBroadcaster(8), - prevAlloc: prevAlloc, - dirtyCh: make(chan struct{}, 1), - //allocDir: allocdir.NewAllocDir(logger, filepath.Join(config.AllocDir, alloc.ID)), - tasks: make(map[string]*taskrunner.TaskRunner), - taskStates: copyTaskStates(alloc.TaskStates), - restored: make(map[string]struct{}), - updateCh: make(chan *structs.Allocation, 64), - waitCh: make(chan struct{}), - vaultClient: vaultClient, - consulClient: consulClient, - } - - // TODO Should be passed a context - ar.ctx, ar.exitFn = context.WithCancel(context.TODO()) - - return ar -} - -// setBaseLabels creates the set of base labels. This should be called after -// Restore has been called so the allocation is guaranteed to be loaded -func (r *AllocRunner) setBaseLabels() { - r.baseLabels = make([]metrics.Label, 0, 3) - - if r.alloc.Job != nil { - r.baseLabels = append(r.baseLabels, metrics.Label{ - Name: "job", - Value: r.alloc.Job.Name, - }) - } - if r.alloc.TaskGroup != "" { - r.baseLabels = append(r.baseLabels, metrics.Label{ - Name: "task_group", - Value: r.alloc.TaskGroup, - }) - } - if r.config != nil && r.config.Node != nil { - r.baseLabels = append(r.baseLabels, metrics.Label{ - Name: "node_id", - Value: r.config.Node.ID, - }) - } -} - -// pre060StateFilePath returns the path to our state file that would have been -// written pre v0.6.0 -// COMPAT: Remove in 0.7.0 -func (r *AllocRunner) pre060StateFilePath() string { - r.allocLock.Lock() - defer r.allocLock.Unlock() - path := filepath.Join(r.config.StateDir, "alloc", r.allocID, "state.json") - return path -} - -// RestoreState is used to restore the state of the alloc runner -func (r *AllocRunner) RestoreState() error { - //XXX Deprecated: see allocrunner - //err := r.stateDB.View(func(tx *bolt.Tx) error { - // bkt, err := state.GetAllocationBucket(tx, r.allocID) - // if err != nil { - // return fmt.Errorf("failed to get allocation bucket: %v", err) - // } - - // // Get the state objects - // var mutable allocRunnerMutableState - // var immutable allocRunnerImmutableState - // var allocState allocRunnerAllocState - // var allocDir allocdir.AllocDir - - // if err := state.GetObject(bkt, allocRunnerStateAllocKey, &allocState); err != nil { - // return fmt.Errorf("failed to read alloc runner alloc state: %v", err) - // } - // if err := state.GetObject(bkt, allocRunnerStateImmutableKey, &immutable); err != nil { - // return fmt.Errorf("failed to read alloc runner immutable state: %v", err) - // } - // if err := state.GetObject(bkt, allocRunnerStateMutableKey, &mutable); err != nil { - // return fmt.Errorf("failed to read alloc runner mutable state: %v", err) - // } - // if err := state.GetObject(bkt, allocRunnerStateAllocDirKey, &allocDir); err != nil { - // return fmt.Errorf("failed to read alloc runner alloc_dir state: %v", err) - // } - - // // Populate the fields - // r.alloc = allocState.Alloc - // r.allocDir = &allocDir - // r.allocClientStatus = mutable.AllocClientStatus - // r.allocClientDescription = mutable.AllocClientDescription - // r.taskStates = mutable.TaskStates - // r.alloc.ClientStatus = getClientStatus(r.taskStates) - // r.alloc.DeploymentStatus = mutable.DeploymentStatus - // return nil - //}) - - //if err != nil { - // return fmt.Errorf("failed to read allocation state: %v", err) - //} - - var snapshotErrors multierror.Error - if r.alloc == nil { - snapshotErrors.Errors = append(snapshotErrors.Errors, fmt.Errorf("alloc_runner snapshot includes a nil allocation")) - } - if r.allocDir == nil { - snapshotErrors.Errors = append(snapshotErrors.Errors, fmt.Errorf("alloc_runner snapshot includes a nil alloc dir")) - } - if e := snapshotErrors.ErrorOrNil(); e != nil { - return e - } - - tg := r.alloc.Job.LookupTaskGroup(r.alloc.TaskGroup) - if tg == nil { - return fmt.Errorf("restored allocation doesn't contain task group %q", r.alloc.TaskGroup) - } - - // Restore the task runners - taskDestroyEvent := structs.NewTaskEvent(structs.TaskKilled) - var mErr multierror.Error - for _, task := range tg.Tasks { - name := task.Name - state := r.taskStates[name] - - // Nomad exited before task could start, nothing to restore. - // AllocRunner.Run will start a new TaskRunner for this task - if state == nil { - continue - } - - // Mark the task as restored. - r.restored[name] = struct{}{} - - td, ok := r.allocDir.TaskDirs[name] - if !ok { - // Create the task dir metadata if it doesn't exist. - // Since task dirs are created during r.Run() the - // client may save state and exit before all task dirs - // are created - td = r.allocDir.NewTaskDir(name) - } - - // Skip tasks in terminal states. - if state.State == structs.TaskStateDead { - continue - } - - tr := taskrunner.NewTaskRunner(r.logger, r.config, r.stateDB, r.setTaskState, td, r.Alloc(), task, r.vaultClient, r.consulClient) - r.tasks[name] = tr - - if restartReason, err := tr.RestoreState(); err != nil { - r.logger.Printf("[ERR] client: failed to restore state for alloc %s task %q: %v", r.allocID, name, err) - mErr.Errors = append(mErr.Errors, err) - } else if !r.alloc.TerminalStatus() { - // Only start if the alloc isn't in a terminal status. - go tr.Run() - - // Restart task runner if RestoreState gave a reason - if restartReason != "" { - r.logger.Printf("[INFO] client: restarting alloc %s task %s: %v", r.allocID, name, restartReason) - const failure = false - tr.Restart("upgrade", restartReason, failure) - } - } else { - // XXX This does nothing and is broken since the task runner is not - // running yet, and there is nothing listening to the destroy ch. - // XXX When a single task is dead in the allocation we should kill - // all the task. This currently does NOT happen. Re-enable test: - // TestAllocRunner_TaskLeader_StopRestoredTG - tr.Destroy(taskDestroyEvent) - } - } - - return mErr.ErrorOrNil() -} - -// SaveState is used to snapshot the state of the alloc runner -// if the fullSync is marked as false only the state of the Alloc Runner -// is snapshotted. If fullSync is marked as true, we snapshot -// all the Task Runners associated with the Alloc -func (r *AllocRunner) SaveState() error { - if err := r.saveAllocRunnerState(); err != nil { - return err - } - - // Save state for each task - runners := r.getTaskRunners() - var mErr multierror.Error - for _, tr := range runners { - if err := tr.SaveState(); err != nil { - mErr.Errors = append(mErr.Errors, fmt.Errorf("failed to save state for alloc %s task %q: %v", - r.allocID, tr.Name(), err)) - } - } - return mErr.ErrorOrNil() -} - -func (r *AllocRunner) saveAllocRunnerState() error { - r.allocStateLock.Lock() - defer r.allocStateLock.Unlock() - - if r.ctx.Err() == context.Canceled { - return nil - } - - //XXX Deprecated: see allocrunner - return nil - - //// Grab all the relevant data - //alloc := r.Alloc() - - //r.allocLock.Lock() - //allocClientStatus := r.allocClientStatus - //allocClientDescription := r.allocClientDescription - //r.allocLock.Unlock() - - //r.allocDirLock.Lock() - //allocDir := r.allocDir.Copy() - //r.allocDirLock.Unlock() - - //// Start the transaction. - //return r.stateDB.Batch(func(tx *bolt.Tx) error { - - // // Grab the allocation bucket - // allocBkt, err := state.GetAllocationBucket(tx, r.allocID) - // if err != nil { - // return fmt.Errorf("failed to retrieve allocation bucket: %v", err) - // } - - // // Write the allocation if the eval has changed - // r.persistedEvalLock.Lock() - // lastPersisted := r.persistedEval - // r.persistedEvalLock.Unlock() - // if alloc.EvalID != lastPersisted { - // allocState := &allocRunnerAllocState{ - // Alloc: alloc, - // } - - // if err := state.PutObject(allocBkt, allocRunnerStateAllocKey, &allocState); err != nil { - // return fmt.Errorf("failed to write alloc_runner alloc state: %v", err) - // } - - // tx.OnCommit(func() { - // r.persistedEvalLock.Lock() - // r.persistedEval = alloc.EvalID - // r.persistedEvalLock.Unlock() - // }) - // } - - // // Write immutable data iff it hasn't been written yet - // if !r.immutablePersisted { - // immutable := &allocRunnerImmutableState{ - // Version: r.config.Version.VersionNumber(), - // } - - // if err := state.PutObject(allocBkt, allocRunnerStateImmutableKey, &immutable); err != nil { - // return fmt.Errorf("failed to write alloc_runner immutable state: %v", err) - // } - - // tx.OnCommit(func() { - // r.immutablePersisted = true - // }) - // } - - // // Write the alloc dir data if it hasn't been written before and it exists. - // if !r.allocDirPersisted && allocDir != nil { - // if err := state.PutObject(allocBkt, allocRunnerStateAllocDirKey, allocDir); err != nil { - // return fmt.Errorf("failed to write alloc_runner allocDir state: %v", err) - // } - - // tx.OnCommit(func() { - // r.allocDirPersisted = true - // }) - // } - - // // Write the mutable state every time - // mutable := &allocRunnerMutableState{ - // AllocClientStatus: allocClientStatus, - // AllocClientDescription: allocClientDescription, - // TaskStates: alloc.TaskStates, - // DeploymentStatus: alloc.DeploymentStatus, - // } - - // if err := state.PutObject(allocBkt, allocRunnerStateMutableKey, &mutable); err != nil { - // return fmt.Errorf("failed to write alloc_runner mutable state: %v", err) - // } - - // return nil - //}) -} - -// DestroyState is used to cleanup after ourselves -func (r *AllocRunner) DestroyState() error { - //r.allocStateLock.Lock() - //defer r.allocStateLock.Unlock() - - //return r.stateDB.Update(func(tx *bolt.Tx) error { - // if err := state.DeleteAllocationBucket(tx, r.allocID); err != nil { - // return fmt.Errorf("failed to delete allocation bucket: %v", err) - // } - // return nil - //}) - panic("deprecated: use allocrunner") -} - -// DestroyContext is used to destroy the context -func (r *AllocRunner) DestroyContext() error { - return r.allocDir.Destroy() -} - -// GetAllocDir returns the alloc dir for the alloc runner -func (r *AllocRunner) GetAllocDir() *allocdir.AllocDir { - return r.allocDir -} - -// GetListener returns a listener for updates broadcast by this alloc runner. -// Callers are responsible for calling Close on their Listener. -func (r *AllocRunner) GetListener() *cstructs.AllocListener { - return r.allocBroadcast.Listen() -} - -// copyTaskStates returns a copy of the passed task states. -func copyTaskStates(states map[string]*structs.TaskState) map[string]*structs.TaskState { - copy := make(map[string]*structs.TaskState, len(states)) - for task, state := range states { - copy[task] = state.Copy() - } - return copy -} - -// finalizeTerminalAlloc sets any missing required fields like -// finishedAt in the alloc runner's task States. finishedAt is used -// to calculate reschedule time for failed allocs, so we make sure that -// it is set -func (r *AllocRunner) finalizeTerminalAlloc(alloc *structs.Allocation) { - if !alloc.ClientTerminalStatus() { - return - } - r.taskStatusLock.Lock() - defer r.taskStatusLock.Unlock() - - group := alloc.Job.LookupTaskGroup(alloc.TaskGroup) - if r.taskStates == nil { - r.taskStates = make(map[string]*structs.TaskState) - } - now := time.Now() - for _, task := range group.Tasks { - ts, ok := r.taskStates[task.Name] - if !ok { - ts = &structs.TaskState{} - r.taskStates[task.Name] = ts - } - if ts.FinishedAt.IsZero() { - ts.FinishedAt = now - } - } - alloc.TaskStates = copyTaskStates(r.taskStates) -} - -// Alloc returns the associated allocation -func (r *AllocRunner) Alloc() *structs.Allocation { - r.allocLock.Lock() - - // Don't do a deep copy of the job - alloc := r.alloc.CopySkipJob() - - // The status has explicitly been set. - if r.allocClientStatus != "" || r.allocClientDescription != "" { - alloc.ClientStatus = r.allocClientStatus - alloc.ClientDescription = r.allocClientDescription - - // Copy over the task states so we don't lose them - r.taskStatusLock.RLock() - alloc.TaskStates = copyTaskStates(r.taskStates) - r.taskStatusLock.RUnlock() - - r.allocLock.Unlock() - r.finalizeTerminalAlloc(alloc) - return alloc - } - - // The health has been set - if r.allocHealth != nil { - if alloc.DeploymentStatus == nil { - alloc.DeploymentStatus = &structs.AllocDeploymentStatus{} - } - alloc.DeploymentStatus.Healthy = helper.BoolToPtr(*r.allocHealth) - alloc.DeploymentStatus.Timestamp = r.allocHealthTime - } - r.allocLock.Unlock() - - // Scan the task states to determine the status of the alloc - r.taskStatusLock.RLock() - alloc.TaskStates = copyTaskStates(r.taskStates) - alloc.ClientStatus = getClientStatus(r.taskStates) - r.taskStatusLock.RUnlock() - - // If the client status is failed and we are part of a deployment, mark the - // alloc as unhealthy. This guards against the watcher not be started. - r.allocLock.Lock() - if alloc.ClientStatus == structs.AllocClientStatusFailed && - alloc.DeploymentID != "" && !alloc.DeploymentStatus.IsUnhealthy() { - alloc.DeploymentStatus = &structs.AllocDeploymentStatus{ - Healthy: helper.BoolToPtr(false), - } - } - r.allocLock.Unlock() - r.finalizeTerminalAlloc(alloc) - return alloc -} - -// getClientStatus takes in the task states for a given allocation and computes -// the client status -func getClientStatus(taskStates map[string]*structs.TaskState) string { - var pending, running, dead, failed bool - for _, state := range taskStates { - switch state.State { - case structs.TaskStateRunning: - running = true - case structs.TaskStatePending: - pending = true - case structs.TaskStateDead: - if state.Failed { - failed = true - } else { - dead = true - } - } - } - - // Determine the alloc status - if failed { - return structs.AllocClientStatusFailed - } else if running { - return structs.AllocClientStatusRunning - } else if pending { - return structs.AllocClientStatusPending - } else if dead { - return structs.AllocClientStatusComplete - } - - return "" -} - -// dirtySyncState is used to watch for state being marked dirty to sync -func (r *AllocRunner) dirtySyncState() { - for { - select { - case <-r.dirtyCh: - if err := r.syncStatus(); err != nil { - // Only WARN instead of ERR because we continue on - r.logger.Printf("[WARN] client: error persisting alloc %q state: %v", - r.allocID, err) - } - case <-r.ctx.Done(): - return - } - } -} - -// syncStatus is used to run and sync the status when it changes -func (r *AllocRunner) syncStatus() error { - // Get a copy of our alloc, update status server side and sync to disk - alloc := r.Alloc() - r.updater(alloc) - r.sendBroadcast(alloc) - return r.saveAllocRunnerState() -} - -// sendBroadcast broadcasts an alloc update. -func (r *AllocRunner) sendBroadcast(alloc *structs.Allocation) { - // Try to send the alloc up to three times with a delay to allow recovery. - sent := false - for i := 0; i < 3; i++ { - //if sent = r.allocBroadcast.Send(alloc); sent { - // break - //} - time.Sleep(500 * time.Millisecond) - } - if !sent { - r.logger.Printf("[WARN] client: failed to broadcast update to allocation %q", r.allocID) - } -} - -// setStatus is used to update the allocation status -func (r *AllocRunner) setStatus(status, desc string) { - r.allocLock.Lock() - r.allocClientStatus = status - r.allocClientDescription = desc - r.allocLock.Unlock() - select { - case r.dirtyCh <- struct{}{}: - default: - } -} - -// setTaskState is used to set the status of a task. If lazySync is set then the -// event is appended but not synced with the server. If state is omitted, the -// last known state is used. -func (r *AllocRunner) setTaskState(taskName, state string, event *structs.TaskEvent, lazySync bool) { - r.taskStatusLock.Lock() - defer r.taskStatusLock.Unlock() - taskState, ok := r.taskStates[taskName] - if !ok { - taskState = &structs.TaskState{} - r.taskStates[taskName] = taskState - } - - // Set the tasks state. - if event != nil { - if event.FailsTask { - taskState.Failed = true - } - if event.Type == structs.TaskRestarting { - if !r.config.DisableTaggedMetrics { - metrics.IncrCounterWithLabels([]string{"client", "allocs", "restart"}, - 1, r.baseLabels) - } - if r.config.BackwardsCompatibleMetrics { - metrics.IncrCounter([]string{"client", "allocs", r.alloc.Job.Name, r.alloc.TaskGroup, taskName, "restart"}, 1) - } - taskState.Restarts++ - taskState.LastRestart = time.Unix(0, event.Time) - } - r.appendTaskEvent(taskState, event) - } - - if lazySync { - return - } - - // If the state hasn't been set use the existing state. - if state == "" { - state = taskState.State - if taskState.State == "" { - state = structs.TaskStatePending - } - } - - switch state { - case structs.TaskStateRunning: - // Capture the start time if it is just starting - if taskState.State != structs.TaskStateRunning { - taskState.StartedAt = time.Now().UTC() - if !r.config.DisableTaggedMetrics { - metrics.IncrCounterWithLabels([]string{"client", "allocs", "running"}, - 1, r.baseLabels) - } - if r.config.BackwardsCompatibleMetrics { - metrics.IncrCounter([]string{"client", "allocs", r.alloc.Job.Name, r.alloc.TaskGroup, taskName, "running"}, 1) - } - } - case structs.TaskStateDead: - // Capture the finished time if not already set - if taskState.FinishedAt.IsZero() { - taskState.FinishedAt = time.Now().UTC() - } - - // Find all tasks that are not the one that is dead and check if the one - // that is dead is a leader - var otherTaskRunners []*taskrunner.TaskRunner - var otherTaskNames []string - leader := false - for task, tr := range r.tasks { - if task != taskName { - otherTaskRunners = append(otherTaskRunners, tr) - otherTaskNames = append(otherTaskNames, task) - } else if tr.IsLeader() { - leader = true - } - } - - // Emitting metrics to indicate task complete and failures - if taskState.Failed { - if !r.config.DisableTaggedMetrics { - metrics.IncrCounterWithLabels([]string{"client", "allocs", "failed"}, - 1, r.baseLabels) - } - if r.config.BackwardsCompatibleMetrics { - metrics.IncrCounter([]string{"client", "allocs", r.alloc.Job.Name, r.alloc.TaskGroup, taskName, "failed"}, 1) - } - } else { - if !r.config.DisableTaggedMetrics { - metrics.IncrCounterWithLabels([]string{"client", "allocs", "complete"}, - 1, r.baseLabels) - } - if r.config.BackwardsCompatibleMetrics { - metrics.IncrCounter([]string{"client", "allocs", r.alloc.Job.Name, r.alloc.TaskGroup, taskName, "complete"}, 1) - } - } - - // If the task failed, we should kill all the other tasks in the task group. - if taskState.Failed { - for _, tr := range otherTaskRunners { - tr.Destroy(structs.NewTaskEvent(structs.TaskSiblingFailed).SetFailedSibling(taskName)) - } - if len(otherTaskRunners) > 0 { - r.logger.Printf("[DEBUG] client: task %q failed, destroying other tasks in task group: %v", taskName, otherTaskNames) - } - } else if leader { - // If the task was a leader task we should kill all the other tasks. - for _, tr := range otherTaskRunners { - tr.Destroy(structs.NewTaskEvent(structs.TaskLeaderDead)) - } - if len(otherTaskRunners) > 0 { - r.logger.Printf("[DEBUG] client: leader task %q is dead, destroying other tasks in task group: %v", taskName, otherTaskNames) - } - } - } - - // Store the new state - taskState.State = state - - select { - case r.dirtyCh <- struct{}{}: - default: - } -} - -// appendTaskEvent updates the task status by appending the new event. -func (r *AllocRunner) appendTaskEvent(state *structs.TaskState, event *structs.TaskEvent) { - capacity := 10 - if state.Events == nil { - state.Events = make([]*structs.TaskEvent, 0, capacity) - } - - // If we hit capacity, then shift it. - if len(state.Events) == capacity { - old := state.Events - state.Events = make([]*structs.TaskEvent, 0, capacity) - state.Events = append(state.Events, old[1:]...) - } - - state.Events = append(state.Events, event) -} - -// Run is a long running goroutine used to manage an allocation -func (r *AllocRunner) Run() { - defer close(r.waitCh) - r.setBaseLabels() - go r.dirtySyncState() - - // Find the task group to run in the allocation - alloc := r.Alloc() - tg := alloc.Job.LookupTaskGroup(alloc.TaskGroup) - if tg == nil { - r.logger.Printf("[ERR] client: alloc %q for missing task group %q", r.allocID, alloc.TaskGroup) - r.setStatus(structs.AllocClientStatusFailed, fmt.Sprintf("missing task group '%s'", alloc.TaskGroup)) - return - } - - // Build allocation directory (idempotent) - r.allocDirLock.Lock() - err := r.allocDir.Build() - r.allocDirLock.Unlock() - - if err != nil { - r.logger.Printf("[ERR] client: alloc %q failed to build task directories: %v", r.allocID, err) - r.setStatus(structs.AllocClientStatusFailed, fmt.Sprintf("failed to build task dirs for '%s'", alloc.TaskGroup)) - return - } - - // Wait for a previous alloc - if any - to terminate - if err := r.prevAlloc.Wait(r.ctx); err != nil { - if err == context.Canceled { - return - } - r.setStatus(structs.AllocClientStatusFailed, fmt.Sprintf("error while waiting for previous alloc to terminate: %v", err)) - return - } - - // Wait for data to be migrated from a previous alloc if applicable - if err := r.prevAlloc.Migrate(r.ctx, r.allocDir); err != nil { - if err == context.Canceled { - return - } - - // Soft-fail on migration errors - r.logger.Printf("[WARN] client: alloc %q error while migrating data from previous alloc: %v", r.allocID, err) - - // Recreate alloc dir to ensure a clean slate - r.allocDir.Destroy() - if err := r.allocDir.Build(); err != nil { - r.logger.Printf("[ERR] client: alloc %q failed to clean task directories after failed migration: %v", r.allocID, err) - r.setStatus(structs.AllocClientStatusFailed, fmt.Sprintf("failed to rebuild task dirs for '%s'", alloc.TaskGroup)) - return - } - } - - // Check if the allocation is in a terminal status. In this case, we don't - // start any of the task runners and directly wait for the destroy signal to - // clean up the allocation. - if alloc.TerminalStatus() { - r.logger.Printf("[DEBUG] client: alloc %q in terminal status, waiting for destroy", r.allocID) - // mark this allocation as completed if it is not already in a - // terminal state - if !alloc.Terminated() { - r.setStatus(structs.AllocClientStatusComplete, "canceled running tasks for allocation in terminal state") - } - r.handleDestroy() - r.logger.Printf("[DEBUG] client: terminating runner for alloc '%s'", r.allocID) - return - } - - // Increment alloc runner start counter. Incr'd even when restoring existing tasks so 1 start != 1 task execution - if !r.config.DisableTaggedMetrics { - metrics.IncrCounterWithLabels([]string{"client", "allocs", "start"}, - 1, r.baseLabels) - } - if r.config.BackwardsCompatibleMetrics { - metrics.IncrCounter([]string{"client", "allocs", r.alloc.Job.Name, r.alloc.TaskGroup, "start"}, 1) - } - - // Start the watcher - wCtx, watcherCancel := context.WithCancel(r.ctx) - go r.watchHealth(wCtx) - - // Start the task runners - r.logger.Printf("[DEBUG] client: starting task runners for alloc '%s'", r.allocID) - r.taskLock.Lock() - for _, task := range tg.Tasks { - if _, ok := r.restored[task.Name]; ok { - continue - } - - r.allocDirLock.Lock() - taskdir := r.allocDir.NewTaskDir(task.Name) - r.allocDirLock.Unlock() - - tr := taskrunner.NewTaskRunner(r.logger, r.config, r.stateDB, r.setTaskState, taskdir, r.Alloc(), task.Copy(), r.vaultClient, r.consulClient) - r.tasks[task.Name] = tr - tr.MarkReceived() - - go tr.Run() - } - r.taskLock.Unlock() - - // taskDestroyEvent contains an event that caused the destruction of a task - // in the allocation. - var taskDestroyEvent *structs.TaskEvent - -OUTER: - // Wait for updates - for { - select { - case update := <-r.updateCh: - // Store the updated allocation. - r.allocLock.Lock() - - // If the deployment ids have changed clear the health - if r.alloc.DeploymentID != update.DeploymentID { - r.allocHealth = nil - r.allocHealthTime = time.Time{} - } - - r.alloc = update - r.allocLock.Unlock() - - // Create a new watcher - watcherCancel() - wCtx, watcherCancel = context.WithCancel(r.ctx) - go r.watchHealth(wCtx) - - // Check if we're in a terminal status - if update.TerminalStatus() { - taskDestroyEvent = structs.NewTaskEvent(structs.TaskKilled) - break OUTER - } - - // Update the task groups - runners := r.getTaskRunners() - for _, tr := range runners { - tr.Update(update) - } - - if err := r.syncStatus(); err != nil { - r.logger.Printf("[WARN] client: failed to sync alloc %q status upon receiving alloc update: %v", - r.allocID, err) - } - - case <-r.ctx.Done(): - taskDestroyEvent = structs.NewTaskEvent(structs.TaskKilled) - break OUTER - } - } - - // Kill the task runners - r.destroyTaskRunners(taskDestroyEvent) - - // Block until we should destroy the state of the alloc - r.handleDestroy() - - // Free up the context. It has likely exited already - watcherCancel() - - r.logger.Printf("[DEBUG] client: terminating runner for alloc '%s'", r.allocID) -} - -// destroyTaskRunners destroys the task runners, waits for them to terminate and -// then saves state. -func (r *AllocRunner) destroyTaskRunners(destroyEvent *structs.TaskEvent) { - // First destroy the leader if one exists - tg := r.alloc.Job.LookupTaskGroup(r.alloc.TaskGroup) - leader := "" - for _, task := range tg.Tasks { - if task.Leader { - leader = task.Name - break - } - } - if leader != "" { - r.taskLock.RLock() - tr := r.tasks[leader] - r.taskLock.RUnlock() - - // Dead tasks don't have a task runner created so guard against - // the leader being dead when this AR was saved. - if tr == nil { - r.logger.Printf("[DEBUG] client: alloc %q leader task %q of task group %q already stopped", - r.allocID, leader, r.alloc.TaskGroup) - } else { - r.logger.Printf("[DEBUG] client: alloc %q destroying leader task %q of task group %q first", - r.allocID, leader, r.alloc.TaskGroup) - tr.Destroy(destroyEvent) - <-tr.WaitCh() - } - } - - // Then destroy non-leader tasks concurrently - r.taskLock.RLock() - for name, tr := range r.tasks { - if name != leader { - tr.Destroy(destroyEvent) - } - } - r.taskLock.RUnlock() - - // Wait for termination of the task runners - for _, tr := range r.getTaskRunners() { - <-tr.WaitCh() - } -} - -// handleDestroy blocks till the AllocRunner should be destroyed and does the -// necessary cleanup. -func (r *AllocRunner) handleDestroy() { - // Final state sync. We do this to ensure that the server has the correct - // state as we wait for a destroy. - alloc := r.Alloc() - - // Increment the destroy count for this alloc runner since this allocation is being removed from this client. - if !r.config.DisableTaggedMetrics { - metrics.IncrCounterWithLabels([]string{"client", "allocs", "destroy"}, - 1, r.baseLabels) - } - if r.config.BackwardsCompatibleMetrics { - metrics.IncrCounter([]string{"client", "allocs", r.alloc.Job.Name, r.alloc.TaskGroup, "destroy"}, 1) - } - - // Broadcast and persist state synchronously - r.sendBroadcast(alloc) - if err := r.saveAllocRunnerState(); err != nil { - r.logger.Printf("[WARN] client: alloc %q unable to persist state but should be GC'd soon anyway:%v", - r.allocID, err) - } - - // Unmount any mounted directories as no tasks are running and makes - // cleaning up Nomad's data directory simpler. - if err := r.allocDir.UnmountAll(); err != nil { - r.logger.Printf("[ERR] client: alloc %q unable unmount task directories: %v", r.allocID, err) - } - - // Update the server with the alloc's status -- also marks the alloc as - // being eligible for GC, so from this point on the alloc can be gc'd - // at any time. - r.updater(alloc) - - for { - select { - case <-r.ctx.Done(): - if err := r.DestroyContext(); err != nil { - r.logger.Printf("[ERR] client: failed to destroy context for alloc '%s': %v", - r.allocID, err) - } - if err := r.DestroyState(); err != nil { - r.logger.Printf("[ERR] client: failed to destroy state for alloc '%s': %v", - r.allocID, err) - } - - return - case <-r.updateCh: - r.logger.Printf("[DEBUG] client: dropping update to terminal alloc '%s'", r.allocID) - } - } -} - -// IsWaiting returns true if this alloc is waiting on a previous allocation to -// terminate. -func (r *AllocRunner) IsWaiting() bool { - return r.prevAlloc.IsWaiting() -} - -// IsMigrating returns true if this alloc is migrating data from a previous -// allocation. -func (r *AllocRunner) IsMigrating() bool { - return r.prevAlloc.IsMigrating() -} - -// Update is used to update the allocation of the context -func (r *AllocRunner) Update(update *structs.Allocation) { - select { - case r.updateCh <- update: - default: - r.logger.Printf("[ERR] client: dropping update to alloc '%s'", update.ID) - } -} - -// StatsReporter returns an interface to query resource usage statistics of an -// allocation -func (r *AllocRunner) StatsReporter() AllocStatsReporter { - return r -} - -// getTaskRunners is a helper that returns a copy of the task runners list using -// the taskLock. -func (r *AllocRunner) getTaskRunners() []*taskrunner.TaskRunner { - // Get the task runners - r.taskLock.RLock() - defer r.taskLock.RUnlock() - runners := make([]*taskrunner.TaskRunner, 0, len(r.tasks)) - for _, tr := range r.tasks { - runners = append(runners, tr) - } - return runners -} - -// LatestAllocStats returns the latest allocation stats. If the optional taskFilter is set -// the allocation stats will only include the given task. -func (r *AllocRunner) LatestAllocStats(taskFilter string) (*cstructs.AllocResourceUsage, error) { - astat := &cstructs.AllocResourceUsage{ - Tasks: make(map[string]*cstructs.TaskResourceUsage), - } - - var flat []*cstructs.TaskResourceUsage - if taskFilter != "" { - r.taskLock.RLock() - tr, ok := r.tasks[taskFilter] - r.taskLock.RUnlock() - if !ok { - return nil, fmt.Errorf("allocation %q has no task %q", r.allocID, taskFilter) - } - l := tr.LatestResourceUsage() - if l != nil { - astat.Tasks[taskFilter] = l - flat = []*cstructs.TaskResourceUsage{l} - astat.Timestamp = l.Timestamp - } - } else { - // Get the task runners - runners := r.getTaskRunners() - for _, tr := range runners { - l := tr.LatestResourceUsage() - if l != nil { - astat.Tasks[tr.Name()] = l - flat = append(flat, l) - if l.Timestamp > astat.Timestamp { - astat.Timestamp = l.Timestamp - } - } - } - } - - astat.ResourceUsage = sumTaskResourceUsage(flat) - return astat, nil -} - -// sumTaskResourceUsage takes a set of task resources and sums their resources -func sumTaskResourceUsage(usages []*cstructs.TaskResourceUsage) *cstructs.ResourceUsage { - summed := &cstructs.ResourceUsage{ - MemoryStats: &cstructs.MemoryStats{}, - CpuStats: &cstructs.CpuStats{}, - } - for _, usage := range usages { - summed.Add(usage.ResourceUsage) - } - return summed -} - -// ShouldUpdate takes the AllocModifyIndex of an allocation sent from the server and -// checks if the current running allocation is behind and should be updated. -func (r *AllocRunner) ShouldUpdate(serverIndex uint64) bool { - r.allocLock.Lock() - defer r.allocLock.Unlock() - return r.alloc.AllocModifyIndex < serverIndex -} - -// Destroy is used to indicate that the allocation context should be destroyed -func (r *AllocRunner) Destroy() { - // Lock when closing the context as that gives the save state code - // serialization. - r.allocStateLock.Lock() - defer r.allocStateLock.Unlock() - - r.exitFn() - r.allocBroadcast.Close() -} - -// IsDestroyed returns true if the AllocRunner is not running and has been -// destroyed (GC'd). -func (r *AllocRunner) IsDestroyed() bool { - select { - case <-r.waitCh: - return true - default: - return false - } -} - -// WaitCh returns a channel to wait for termination -func (r *AllocRunner) WaitCh() <-chan struct{} { - return r.waitCh -} - -// AllocID returns the allocation ID of the allocation being run -func (r *AllocRunner) AllocID() string { - if r == nil { - return "" - } - return r.allocID -} diff --git a/client/allocrunnerdeprecated/alloc_runner_health_watcher.go b/client/allocrunnerdeprecated/alloc_runner_health_watcher.go deleted file mode 100644 index a457256e6413..000000000000 --- a/client/allocrunnerdeprecated/alloc_runner_health_watcher.go +++ /dev/null @@ -1,579 +0,0 @@ -// +build deprecated - -package allocrunner - -import ( - "context" - "fmt" - "log" - "strings" - "sync" - "time" - - "github.com/hashicorp/consul/api" - consulApi "github.com/hashicorp/nomad/client/consul" - cstructs "github.com/hashicorp/nomad/client/structs" - "github.com/hashicorp/nomad/command/agent/consul" - "github.com/hashicorp/nomad/helper" - "github.com/hashicorp/nomad/nomad/structs" -) - -const ( - // consulCheckLookupInterval is the interval at which we check if the - // Consul checks are healthy or unhealthy. - consulCheckLookupInterval = 500 * time.Millisecond - - // allocHealthEventSource is the source used for emitting task events - allocHealthEventSource = "Alloc Unhealthy" -) - -// watchHealth is responsible for watching an allocation's task status and -// potentially Consul health check status to determine if the allocation is -// healthy or unhealthy. -func (r *AllocRunner) watchHealth(ctx context.Context) { - - // See if we should watch the allocs health - alloc := r.Alloc() - - // Neither deployments nor migrations care about the health of - // non-service jobs so never watch their health - if alloc.Job.Type != structs.JobTypeService { - return - } - - // No need to watch health as it's already set - if alloc.DeploymentStatus.HasHealth() { - return - } - - tg := alloc.Job.LookupTaskGroup(alloc.TaskGroup) - if tg == nil { - r.logger.Printf("[ERR] client.alloc_watcher: failed to lookup allocation %q task group %q. Exiting watcher", - alloc.ID, alloc.TaskGroup) - return - } - - isDeploy := alloc.DeploymentID != "" - - // No need to watch allocs for deployments that rely on operators - // manually setting health - if isDeploy && (tg.Update == nil || tg.Update.HealthCheck == structs.UpdateStrategyHealthCheck_Manual) { - return - } - - // Get an allocation listener to watch for alloc events - l := r.allocBroadcast.Listen() - defer l.Close() - - // Define the deadline, health method, min healthy time from the - // deployment if this is a deployment; otherwise from the migration - // strategy. - var deadline time.Time - var useChecks bool - var minHealthyTime time.Duration - - if isDeploy { - deadline = time.Now().Add(tg.Update.HealthyDeadline) - minHealthyTime = tg.Update.MinHealthyTime - useChecks = tg.Update.HealthCheck == structs.UpdateStrategyHealthCheck_Checks - } else { - strategy := tg.Migrate - if strategy == nil { - // For backwards compat with pre-0.8 allocations that - // don't have a migrate strategy set. - strategy = structs.DefaultMigrateStrategy() - } - deadline = time.Now().Add(strategy.HealthyDeadline) - minHealthyTime = strategy.MinHealthyTime - useChecks = strategy.HealthCheck == structs.MigrateStrategyHealthChecks - } - - // Create a new context with the health deadline - healthCtx, healthCtxCancel := context.WithDeadline(ctx, deadline) - defer healthCtxCancel() - r.logger.Printf("[DEBUG] client.alloc_watcher: deadline for alloc %q is at %v (deploy=%t checks=%t)", alloc.ID, deadline, isDeploy, useChecks) - - // Create the health tracker object - tracker := newAllocHealthTracker(healthCtx, r.logger, alloc, l, r.consulClient, minHealthyTime, useChecks) - tracker.Start() - - allocHealthy := false - select { - case <-healthCtx.Done(): - // We were cancelled which means we are no longer needed - if healthCtx.Err() == context.Canceled { - return - } - - // Since the deadline has been reached we are not healthy - case <-tracker.AllocStoppedCh(): - // The allocation was stopped so nothing to do - return - case healthy := <-tracker.HealthyCh(): - allocHealthy = healthy - } - - r.allocLock.Lock() - r.allocHealth = helper.BoolToPtr(allocHealthy) - r.allocHealthTime = time.Now() - r.allocLock.Unlock() - - // If deployment is unhealthy emit task events explaining why - if !allocHealthy && isDeploy { - r.taskLock.RLock() - for task, event := range tracker.TaskEvents() { - if tr, ok := r.tasks[task]; ok { - tr.EmitEvent(allocHealthEventSource, event) - } - } - r.taskLock.RUnlock() - } - - r.syncStatus() -} - -// allocHealthTracker tracks the health of an allocation and makes health events -// watchable via channels. -type allocHealthTracker struct { - // logger is used to log - logger *log.Logger - - // ctx and cancelFn is used to shutdown the tracker - ctx context.Context - cancelFn context.CancelFunc - - // alloc is the alloc we are tracking - alloc *structs.Allocation - - // tg is the task group we are tracking - tg *structs.TaskGroup - - // minHealthyTime is the duration an alloc must remain healthy to be - // considered healthy - minHealthyTime time.Duration - - // useChecks specifies whether to use Consul healh checks or not - useChecks bool - - // consulCheckCount is the number of checks the task group will attempt to - // register - consulCheckCount int - - // allocUpdates is a listener for retrieving new alloc updates - allocUpdates *cstructs.AllocListener - - // consulClient is used to look up the state of the task's checks - consulClient consulApi.ConsulServiceAPI - - // healthy is used to signal whether we have determined the allocation to be - // healthy or unhealthy - healthy chan bool - - // allocStopped is triggered when the allocation is stopped and tracking is - // not needed - allocStopped chan struct{} - - // l is used to lock shared fields listed below - l sync.Mutex - - // tasksHealthy marks whether all the tasks have met their health check - // (disregards Consul) - tasksHealthy bool - - // allocFailed marks whether the allocation failed - allocFailed bool - - // checksHealthy marks whether all the task's Consul checks are healthy - checksHealthy bool - - // taskHealth contains the health state for each task - taskHealth map[string]*taskHealthState -} - -// newAllocHealthTracker returns a health tracker for the given allocation. An -// alloc listener and consul API object are given so that the watcher can detect -// health changes. -func newAllocHealthTracker(parentCtx context.Context, logger *log.Logger, alloc *structs.Allocation, - allocUpdates *cstructs.AllocListener, consulClient consulApi.ConsulServiceAPI, - minHealthyTime time.Duration, useChecks bool) *allocHealthTracker { - - a := &allocHealthTracker{ - logger: logger, - healthy: make(chan bool, 1), - allocStopped: make(chan struct{}), - alloc: alloc, - tg: alloc.Job.LookupTaskGroup(alloc.TaskGroup), - minHealthyTime: minHealthyTime, - useChecks: useChecks, - allocUpdates: allocUpdates, - consulClient: consulClient, - } - - a.taskHealth = make(map[string]*taskHealthState, len(a.tg.Tasks)) - for _, task := range a.tg.Tasks { - a.taskHealth[task.Name] = &taskHealthState{task: task} - } - - for _, task := range a.tg.Tasks { - for _, s := range task.Services { - a.consulCheckCount += len(s.Checks) - } - } - - a.ctx, a.cancelFn = context.WithCancel(parentCtx) - return a -} - -// Start starts the watcher. -func (a *allocHealthTracker) Start() { - go a.watchTaskEvents() - if a.useChecks { - go a.watchConsulEvents() - } -} - -// HealthyCh returns a channel that will emit a boolean indicating the health of -// the allocation. -func (a *allocHealthTracker) HealthyCh() <-chan bool { - return a.healthy -} - -// AllocStoppedCh returns a channel that will be fired if the allocation is -// stopped. This means that health will not be set. -func (a *allocHealthTracker) AllocStoppedCh() <-chan struct{} { - return a.allocStopped -} - -// TaskEvents returns a map of events by task. This should only be called after -// health has been determined. Only tasks that have contributed to the -// allocation being unhealthy will have an event. -func (a *allocHealthTracker) TaskEvents() map[string]string { - a.l.Lock() - defer a.l.Unlock() - - // Nothing to do since the failure wasn't task related - if a.allocFailed { - return nil - } - - deadline, _ := a.ctx.Deadline() - events := make(map[string]string, len(a.tg.Tasks)) - - // Go through are task information and build the event map - for task, state := range a.taskHealth { - useChecks := a.tg.Update.HealthCheck == structs.UpdateStrategyHealthCheck_Checks - if e, ok := state.event(deadline, a.tg.Update.MinHealthyTime, useChecks); ok { - events[task] = e - } - } - - return events -} - -// setTaskHealth is used to set the tasks health as healthy or unhealthy. If the -// allocation is terminal, health is immediately broadcasted. -func (a *allocHealthTracker) setTaskHealth(healthy, terminal bool) { - a.l.Lock() - defer a.l.Unlock() - a.tasksHealthy = healthy - - // If we are marked healthy but we also require Consul to be healthy and it - // isn't yet, return, unless the task is terminal - requireConsul := a.useChecks && a.consulCheckCount > 0 - if !terminal && healthy && requireConsul && !a.checksHealthy { - return - } - - select { - case a.healthy <- healthy: - default: - } - - // Shutdown the tracker - a.cancelFn() -} - -// setCheckHealth is used to mark the checks as either healthy or unhealthy. -func (a *allocHealthTracker) setCheckHealth(healthy bool) { - a.l.Lock() - defer a.l.Unlock() - a.checksHealthy = healthy - - // Only signal if we are healthy and so is the tasks - if !healthy || !a.tasksHealthy { - return - } - - select { - case a.healthy <- healthy: - default: - } - - // Shutdown the tracker - a.cancelFn() -} - -// markAllocStopped is used to mark the allocation as having stopped. -func (a *allocHealthTracker) markAllocStopped() { - close(a.allocStopped) - a.cancelFn() -} - -// watchTaskEvents is a long lived watcher that watches for the health of the -// allocation's tasks. -func (a *allocHealthTracker) watchTaskEvents() { - alloc := a.alloc - allStartedTime := time.Time{} - healthyTimer := time.NewTimer(0) - if !healthyTimer.Stop() { - select { - case <-healthyTimer.C: - default: - } - } - - for { - // If the alloc is being stopped by the server just exit - switch alloc.DesiredStatus { - case structs.AllocDesiredStatusStop, structs.AllocDesiredStatusEvict: - a.logger.Printf("[TRACE] client.alloc_watcher: desired status terminal for alloc %q", alloc.ID) - a.markAllocStopped() - return - } - - // Store the task states - a.l.Lock() - for task, state := range alloc.TaskStates { - a.taskHealth[task].state = state - } - a.l.Unlock() - - // Detect if the alloc is unhealthy or if all tasks have started yet - latestStartTime := time.Time{} - for _, state := range alloc.TaskStates { - // One of the tasks has failed so we can exit watching - if state.Failed || !state.FinishedAt.IsZero() { - a.setTaskHealth(false, true) - return - } - - if state.State != structs.TaskStateRunning { - latestStartTime = time.Time{} - break - } else if state.StartedAt.After(latestStartTime) { - latestStartTime = state.StartedAt - } - } - - // If the alloc is marked as failed by the client but none of the - // individual tasks failed, that means something failed at the alloc - // level. - if alloc.ClientStatus == structs.AllocClientStatusFailed { - a.logger.Printf("[TRACE] client.alloc_watcher: client status failed for alloc %q", alloc.ID) - a.l.Lock() - a.allocFailed = true - a.l.Unlock() - a.setTaskHealth(false, true) - return - } - - if !latestStartTime.Equal(allStartedTime) { - // Avoid the timer from firing at the old start time - if !healthyTimer.Stop() { - select { - case <-healthyTimer.C: - default: - } - } - - // Set the timer since all tasks are started - if !latestStartTime.IsZero() { - allStartedTime = latestStartTime - healthyTimer.Reset(a.minHealthyTime) - } - } - - select { - case <-a.ctx.Done(): - return - case newAlloc, ok := <-a.allocUpdates.Ch: - if !ok { - return - } - alloc = newAlloc - case <-healthyTimer.C: - a.setTaskHealth(true, false) - } - } -} - -// watchConsulEvents iis a long lived watcher that watches for the health of the -// allocation's Consul checks. -func (a *allocHealthTracker) watchConsulEvents() { - // checkTicker is the ticker that triggers us to look at the checks in - // Consul - checkTicker := time.NewTicker(consulCheckLookupInterval) - defer checkTicker.Stop() - - // healthyTimer fires when the checks have been healthy for the - // MinHealthyTime - healthyTimer := time.NewTimer(0) - if !healthyTimer.Stop() { - select { - case <-healthyTimer.C: - default: - } - } - - // primed marks whether the healthy timer has been set - primed := false - - // Store whether the last Consul checks call was successful or not - consulChecksErr := false - - // allocReg are the registered objects in Consul for the allocation - var allocReg *consul.AllocRegistration - -OUTER: - for { - select { - case <-a.ctx.Done(): - return - case <-checkTicker.C: - newAllocReg, err := a.consulClient.AllocRegistrations(a.alloc.ID) - if err != nil { - if !consulChecksErr { - consulChecksErr = true - a.logger.Printf("[WARN] client.alloc_watcher: failed to lookup Consul registrations for allocation %q: %v", a.alloc.ID, err) - } - continue OUTER - } else { - consulChecksErr = false - allocReg = newAllocReg - } - case <-healthyTimer.C: - a.setCheckHealth(true) - } - - if allocReg == nil { - continue - } - - // Store the task registrations - a.l.Lock() - for task, reg := range allocReg.Tasks { - a.taskHealth[task].taskRegistrations = reg - } - a.l.Unlock() - - // Detect if all the checks are passing - passed := true - - CHECKS: - for _, treg := range allocReg.Tasks { - for _, sreg := range treg.Services { - for _, check := range sreg.Checks { - if check.Status == api.HealthPassing { - continue - } - - passed = false - a.setCheckHealth(false) - break CHECKS - } - } - } - - if !passed { - // Reset the timer since we have transitioned back to unhealthy - if primed { - if !healthyTimer.Stop() { - select { - case <-healthyTimer.C: - default: - } - } - primed = false - } - } else if !primed { - // Reset the timer to fire after MinHealthyTime - if !healthyTimer.Stop() { - select { - case <-healthyTimer.C: - default: - } - } - - primed = true - healthyTimer.Reset(a.minHealthyTime) - } - } -} - -// taskHealthState captures all known health information about a task. It is -// largely used to determine if the task has contributed to the allocation being -// unhealthy. -type taskHealthState struct { - task *structs.Task - state *structs.TaskState - taskRegistrations *consul.TaskRegistration -} - -// event takes the deadline time for the allocation to be healthy and the update -// strategy of the group. It returns true if the task has contributed to the -// allocation being unhealthy and if so, an event description of why. -func (t *taskHealthState) event(deadline time.Time, minHealthyTime time.Duration, useChecks bool) (string, bool) { - requireChecks := false - desiredChecks := 0 - for _, s := range t.task.Services { - if nc := len(s.Checks); nc > 0 { - requireChecks = true - desiredChecks += nc - } - } - requireChecks = requireChecks && useChecks - - if t.state != nil { - if t.state.Failed { - return "Unhealthy because of failed task", true - } - if t.state.State != structs.TaskStateRunning { - return "Task not running by deadline", true - } - - // We are running so check if we have been running long enough - if t.state.StartedAt.Add(minHealthyTime).After(deadline) { - return fmt.Sprintf("Task not running for min_healthy_time of %v by deadline", minHealthyTime), true - } - } - - if t.taskRegistrations != nil { - var notPassing []string - passing := 0 - - OUTER: - for _, sreg := range t.taskRegistrations.Services { - for _, check := range sreg.Checks { - if check.Status != api.HealthPassing { - notPassing = append(notPassing, sreg.Service.Service) - continue OUTER - } else { - passing++ - } - } - } - - if len(notPassing) != 0 { - return fmt.Sprintf("Services not healthy by deadline: %s", strings.Join(notPassing, ", ")), true - } - - if passing != desiredChecks { - return fmt.Sprintf("Only %d out of %d checks registered and passing", passing, desiredChecks), true - } - - } else if requireChecks { - return "Service checks not registered", true - } - - return "", false -} diff --git a/client/allocrunnerdeprecated/alloc_runner_test.go b/client/allocrunnerdeprecated/alloc_runner_test.go deleted file mode 100644 index 4cd665ea1740..000000000000 --- a/client/allocrunnerdeprecated/alloc_runner_test.go +++ /dev/null @@ -1,1414 +0,0 @@ -// +build deprecated - -package allocrunner - -import ( - "fmt" - "io/ioutil" - "os" - "path/filepath" - "strings" - "testing" - "time" - - "github.com/boltdb/bolt" - "github.com/hashicorp/consul/api" - "github.com/hashicorp/nomad/command/agent/consul" - "github.com/hashicorp/nomad/helper/testlog" - "github.com/hashicorp/nomad/helper/uuid" - "github.com/hashicorp/nomad/nomad/mock" - "github.com/hashicorp/nomad/nomad/structs" - "github.com/hashicorp/nomad/testutil" - "github.com/stretchr/testify/assert" - - "github.com/hashicorp/nomad/client/allocrunnerdeprecated/taskrunner" - consulApi "github.com/hashicorp/nomad/client/consul" - "github.com/hashicorp/nomad/client/state" - "github.com/stretchr/testify/require" -) - -// allocationBucketExists checks if the allocation bucket was created. -func allocationBucketExists(tx *bolt.Tx, allocID string) bool { - bucket, err := state.GetAllocationBucket(tx, allocID) - return err == nil && bucket != nil -} - -func TestAllocRunner_SimpleRun(t *testing.T) { - t.Parallel() - upd, ar := TestAllocRunner(t, false) - go ar.Run() - defer ar.Destroy() - - testutil.WaitForResult(func() (bool, error) { - last := upd.Last() - if last == nil { - return false, fmt.Errorf("No updates") - } - if last.ClientStatus != structs.AllocClientStatusComplete { - return false, fmt.Errorf("got status %v; want %v", last.ClientStatus, structs.AllocClientStatusComplete) - } - return true, nil - }, func(err error) { - t.Fatalf("err: %v", err) - }) -} - -// Test that FinisheAt is set when the alloc is in a terminal state -func TestAllocRunner_FinishedAtSet(t *testing.T) { - t.Parallel() - require := require.New(t) - _, ar := TestAllocRunner(t, false) - ar.allocClientStatus = structs.AllocClientStatusFailed - alloc := ar.Alloc() - taskFinishedAt := make(map[string]time.Time) - require.NotEmpty(alloc.TaskStates) - for name, s := range alloc.TaskStates { - require.False(s.FinishedAt.IsZero()) - taskFinishedAt[name] = s.FinishedAt - } - - // Verify that calling again should not mutate finishedAt - alloc2 := ar.Alloc() - for name, s := range alloc2.TaskStates { - require.Equal(taskFinishedAt[name], s.FinishedAt) - } - -} - -// Test that FinisheAt is set when the alloc is in a terminal state -func TestAllocRunner_FinishedAtSet_TaskEvents(t *testing.T) { - t.Parallel() - require := require.New(t) - _, ar := TestAllocRunner(t, false) - ar.taskStates[ar.alloc.Job.TaskGroups[0].Tasks[0].Name] = &structs.TaskState{State: structs.TaskStateDead, Failed: true} - - alloc := ar.Alloc() - taskFinishedAt := make(map[string]time.Time) - require.NotEmpty(alloc.TaskStates) - for name, s := range alloc.TaskStates { - require.False(s.FinishedAt.IsZero()) - taskFinishedAt[name] = s.FinishedAt - } - - // Verify that calling again should not mutate finishedAt - alloc2 := ar.Alloc() - for name, s := range alloc2.TaskStates { - require.Equal(taskFinishedAt[name], s.FinishedAt) - } - -} - -// Test that the watcher will mark the allocation as unhealthy. -func TestAllocRunner_DeploymentHealth_Unhealthy_BadStart(t *testing.T) { - t.Parallel() - assert := assert.New(t) - - // Ensure the task fails and restarts - upd, ar := TestAllocRunner(t, true) - - // Make the task fail - task := ar.alloc.Job.TaskGroups[0].Tasks[0] - task.Driver = "mock_driver" - task.Config["start_error"] = "test error" - - // Make the alloc be part of a deployment - ar.alloc.DeploymentID = uuid.Generate() - ar.alloc.Job.TaskGroups[0].Update = structs.DefaultUpdateStrategy.Copy() - ar.alloc.Job.TaskGroups[0].Update.HealthCheck = structs.UpdateStrategyHealthCheck_TaskStates - ar.alloc.Job.TaskGroups[0].Update.MaxParallel = 1 - - go ar.Run() - defer ar.Destroy() - - testutil.WaitForResult(func() (bool, error) { - last := upd.Last() - if last == nil { - return false, fmt.Errorf("No updates") - } - if !last.DeploymentStatus.HasHealth() { - return false, fmt.Errorf("want deployment status unhealthy; got unset") - } else if *last.DeploymentStatus.Healthy { - return false, fmt.Errorf("want deployment status unhealthy; got healthy") - } - return true, nil - }, func(err error) { - t.Fatalf("err: %v", err) - }) - - // Assert that we have an event explaining why we are unhealthy. - assert.Len(ar.taskStates, 1) - state := ar.taskStates[task.Name] - assert.NotNil(state) - assert.NotEmpty(state.Events) - last := state.Events[len(state.Events)-1] - assert.Equal(allocHealthEventSource, last.Type) - assert.Contains(last.Message, "failed task") -} - -// Test that the watcher will mark the allocation as unhealthy if it hits its -// deadline. -func TestAllocRunner_DeploymentHealth_Unhealthy_Deadline(t *testing.T) { - t.Parallel() - - // Don't restart but force service job type - upd, ar := TestAllocRunner(t, false) - ar.alloc.Job.Type = structs.JobTypeService - - // Make the task block - task := ar.alloc.Job.TaskGroups[0].Tasks[0] - task.Driver = "mock_driver" - task.Config = map[string]interface{}{ - "start_block_for": "4s", - "run_for": "10s", - } - - // Make the alloc be part of a deployment - ar.alloc.DeploymentID = uuid.Generate() - ar.alloc.Job.TaskGroups[0].Update = structs.DefaultUpdateStrategy.Copy() - ar.alloc.Job.TaskGroups[0].Update.HealthCheck = structs.UpdateStrategyHealthCheck_TaskStates - ar.alloc.Job.TaskGroups[0].Update.MaxParallel = 1 - ar.alloc.Job.TaskGroups[0].Update.HealthyDeadline = 100 * time.Millisecond - - go ar.Run() - defer ar.Destroy() - - testutil.WaitForResult(func() (bool, error) { - last := upd.Last() - if last == nil { - return false, fmt.Errorf("No updates") - } - - // Assert alloc is unhealthy - if !last.DeploymentStatus.HasHealth() { - return false, fmt.Errorf("want deployment status unhealthy; got unset") - } else if *last.DeploymentStatus.Healthy { - return false, fmt.Errorf("want deployment status unhealthy; got healthy") - } - - // Assert there is a task event explaining why we are unhealthy. - state, ok := last.TaskStates[task.Name] - if !ok { - return false, fmt.Errorf("missing state for task %s", task.Name) - } - n := len(state.Events) - if n == 0 { - return false, fmt.Errorf("no task events") - } - lastEvent := state.Events[n-1] - if lastEvent.Type != allocHealthEventSource { - return false, fmt.Errorf("expected %q; found %q", allocHealthEventSource, lastEvent.Type) - } - if !strings.Contains(lastEvent.Message, "not running by deadline") { - return false, fmt.Errorf(`expected "not running by deadline" but found: %s`, lastEvent.Message) - } - - return true, nil - }, func(err error) { - t.Fatalf("err: %v", err) - }) -} - -// Test that the watcher will mark the allocation as healthy. -func TestAllocRunner_DeploymentHealth_Healthy_NoChecks(t *testing.T) { - t.Parallel() - - // Ensure the task fails and restarts - upd, ar := TestAllocRunner(t, true) - - // Make the task run healthy - task := ar.alloc.Job.TaskGroups[0].Tasks[0] - task.Driver = "mock_driver" - task.Config = map[string]interface{}{ - "run_for": "10s", - } - - // Create a task that takes longer to become healthy - ar.alloc.Job.TaskGroups[0].Tasks = append(ar.alloc.Job.TaskGroups[0].Tasks, task.Copy()) - task2 := ar.alloc.Job.TaskGroups[0].Tasks[1] - task2.Name = "task 2" - task2.Config["start_block_for"] = "500ms" - - // Make the alloc be part of a deployment - ar.alloc.DeploymentID = uuid.Generate() - ar.alloc.Job.TaskGroups[0].Update = structs.DefaultUpdateStrategy.Copy() - ar.alloc.Job.TaskGroups[0].Update.HealthCheck = structs.UpdateStrategyHealthCheck_TaskStates - ar.alloc.Job.TaskGroups[0].Update.MaxParallel = 1 - ar.alloc.Job.TaskGroups[0].Update.MinHealthyTime = 100 * time.Millisecond - - start := time.Now() - go ar.Run() - defer ar.Destroy() - - testutil.WaitForResult(func() (bool, error) { - last := upd.Last() - if last == nil { - return false, fmt.Errorf("No updates") - } - if !last.DeploymentStatus.HasHealth() { - return false, fmt.Errorf("want deployment status unhealthy; got unset") - } else if !*last.DeploymentStatus.Healthy { - return false, fmt.Errorf("want deployment status healthy; got unhealthy") - } - return true, nil - }, func(err error) { - t.Fatalf("err: %v", err) - }) - if d := time.Now().Sub(start); d < 500*time.Millisecond { - t.Fatalf("didn't wait for second task group. Only took %v", d) - } -} - -// Test that the watcher will mark the allocation as healthy with checks -func TestAllocRunner_DeploymentHealth_Healthy_Checks(t *testing.T) { - t.Parallel() - - // Ensure the task fails and restarts - upd, ar := TestAllocRunner(t, true) - - // Make the task fail - task := ar.alloc.Job.TaskGroups[0].Tasks[0] - task.Driver = "mock_driver" - task.Config = map[string]interface{}{ - "run_for": "10s", - } - - // Create a task that has no checks - ar.alloc.Job.TaskGroups[0].Tasks = append(ar.alloc.Job.TaskGroups[0].Tasks, task.Copy()) - task2 := ar.alloc.Job.TaskGroups[0].Tasks[1] - task2.Name = "task 2" - task2.Services = nil - - // Make the alloc be part of a deployment - ar.alloc.DeploymentID = uuid.Generate() - ar.alloc.Job.TaskGroups[0].Update = structs.DefaultUpdateStrategy.Copy() - ar.alloc.Job.TaskGroups[0].Update.HealthCheck = structs.UpdateStrategyHealthCheck_Checks - ar.alloc.Job.TaskGroups[0].Update.MaxParallel = 1 - ar.alloc.Job.TaskGroups[0].Update.MinHealthyTime = 100 * time.Millisecond - - checkHealthy := &api.AgentCheck{ - CheckID: uuid.Generate(), - Status: api.HealthPassing, - } - checkUnhealthy := &api.AgentCheck{ - CheckID: checkHealthy.CheckID, - Status: api.HealthWarning, - } - - // Only return the check as healthy after a duration - trigger := time.After(500 * time.Millisecond) - ar.consulClient.(*consulApi.MockConsulServiceClient).AllocRegistrationsFn = func(allocID string) (*consul.AllocRegistration, error) { - select { - case <-trigger: - return &consul.AllocRegistration{ - Tasks: map[string]*consul.TaskRegistration{ - task.Name: { - Services: map[string]*consul.ServiceRegistration{ - "123": { - Service: &api.AgentService{Service: "foo"}, - Checks: []*api.AgentCheck{checkHealthy}, - }, - }, - }, - }, - }, nil - default: - return &consul.AllocRegistration{ - Tasks: map[string]*consul.TaskRegistration{ - task.Name: { - Services: map[string]*consul.ServiceRegistration{ - "123": { - Service: &api.AgentService{Service: "foo"}, - Checks: []*api.AgentCheck{checkUnhealthy}, - }, - }, - }, - }, - }, nil - } - } - - start := time.Now() - go ar.Run() - defer ar.Destroy() - - testutil.WaitForResult(func() (bool, error) { - last := upd.Last() - if last == nil { - return false, fmt.Errorf("No updates") - } - if !last.DeploymentStatus.HasHealth() { - return false, fmt.Errorf("want deployment status unhealthy; got unset") - } else if !*last.DeploymentStatus.Healthy { - return false, fmt.Errorf("want deployment status healthy; got unhealthy") - } - return true, nil - }, func(err error) { - t.Fatalf("err: %v", err) - }) - - if d := time.Now().Sub(start); d < 500*time.Millisecond { - t.Fatalf("didn't wait for second task group. Only took %v", d) - } -} - -// Test that the watcher will mark the allocation as unhealthy with failing -// checks -func TestAllocRunner_DeploymentHealth_Unhealthy_Checks(t *testing.T) { - t.Parallel() - assert := assert.New(t) - - // Ensure the task fails and restarts - upd, ar := TestAllocRunner(t, true) - - // Make the task fail - task := ar.alloc.Job.TaskGroups[0].Tasks[0] - task.Driver = "mock_driver" - task.Config = map[string]interface{}{ - "run_for": "10s", - } - - // Make the alloc be part of a deployment - ar.alloc.DeploymentID = uuid.Generate() - ar.alloc.Job.TaskGroups[0].Update = structs.DefaultUpdateStrategy.Copy() - ar.alloc.Job.TaskGroups[0].Update.HealthCheck = structs.UpdateStrategyHealthCheck_Checks - ar.alloc.Job.TaskGroups[0].Update.MaxParallel = 1 - ar.alloc.Job.TaskGroups[0].Update.MinHealthyTime = 100 * time.Millisecond - ar.alloc.Job.TaskGroups[0].Update.HealthyDeadline = 1 * time.Second - - checkUnhealthy := &api.AgentCheck{ - CheckID: uuid.Generate(), - Status: api.HealthWarning, - } - - // Only return the check as healthy after a duration - ar.consulClient.(*consulApi.MockConsulServiceClient).AllocRegistrationsFn = func(allocID string) (*consul.AllocRegistration, error) { - return &consul.AllocRegistration{ - Tasks: map[string]*consul.TaskRegistration{ - task.Name: { - Services: map[string]*consul.ServiceRegistration{ - "123": { - Service: &api.AgentService{Service: "foo"}, - Checks: []*api.AgentCheck{checkUnhealthy}, - }, - }, - }, - }, - }, nil - } - - go ar.Run() - defer ar.Destroy() - - testutil.WaitForResult(func() (bool, error) { - last := upd.Last() - if last == nil { - return false, fmt.Errorf("No updates") - } - if !last.DeploymentStatus.HasHealth() { - return false, fmt.Errorf("want deployment status unhealthy; got unset") - } else if *last.DeploymentStatus.Healthy { - return false, fmt.Errorf("want deployment status unhealthy; got healthy") - } - return true, nil - }, func(err error) { - t.Fatalf("err: %v", err) - }) - - // Assert that we have an event explaining why we are unhealthy. - assert.Len(ar.taskStates, 1) - state := ar.taskStates[task.Name] - assert.NotNil(state) - assert.NotEmpty(state.Events) - last := state.Events[len(state.Events)-1] - assert.Equal(allocHealthEventSource, last.Type) - assert.Contains(last.Message, "Services not healthy by deadline") -} - -// Test that the watcher will mark the allocation as healthy. -func TestAllocRunner_DeploymentHealth_Healthy_UpdatedDeployment(t *testing.T) { - t.Parallel() - - // Ensure the task fails and restarts - upd, ar := TestAllocRunner(t, true) - - // Make the task run healthy - task := ar.alloc.Job.TaskGroups[0].Tasks[0] - task.Driver = "mock_driver" - task.Config = map[string]interface{}{ - "run_for": "30s", - } - - // Make the alloc be part of a deployment - ar.alloc.DeploymentID = uuid.Generate() - ar.alloc.Job.TaskGroups[0].Update = structs.DefaultUpdateStrategy.Copy() - ar.alloc.Job.TaskGroups[0].Update.HealthCheck = structs.UpdateStrategyHealthCheck_TaskStates - ar.alloc.Job.TaskGroups[0].Update.MaxParallel = 1 - ar.alloc.Job.TaskGroups[0].Update.MinHealthyTime = 100 * time.Millisecond - - go ar.Run() - defer ar.Destroy() - - testutil.WaitForResult(func() (bool, error) { - last := upd.Last() - if last == nil { - return false, fmt.Errorf("No updates") - } - if !last.DeploymentStatus.HasHealth() { - return false, fmt.Errorf("want deployment status unhealthy; got unset") - } else if !*last.DeploymentStatus.Healthy { - return false, fmt.Errorf("want deployment status healthy; got unhealthy") - } - return true, nil - }, func(err error) { - t.Fatalf("err: %v", err) - }) - - // Mimick an update to a new deployment id - last := upd.Last() - last.DeploymentStatus = nil - last.DeploymentID = uuid.Generate() - ar.Update(last) - - testutil.WaitForResult(func() (bool, error) { - last := upd.Last() - if !last.DeploymentStatus.HasHealth() { - return false, fmt.Errorf("want deployment status unhealthy; got unset") - } else if !*last.DeploymentStatus.Healthy { - return false, fmt.Errorf("want deployment status healthy; got unhealthy") - } - return true, nil - }, func(err error) { - t.Fatalf("err: %v", err) - }) -} - -// Test that health is reported for services that got migrated; not just part -// of deployments. -func TestAllocRunner_DeploymentHealth_Healthy_Migration(t *testing.T) { - t.Parallel() - - // Ensure the task fails and restarts - upd, ar := TestAllocRunner(t, true) - - // Make the task run healthy - tg := ar.alloc.Job.TaskGroups[0] - task := tg.Tasks[0] - task.Driver = "mock_driver" - task.Config = map[string]interface{}{ - "run_for": "30s", - } - - // Shorten the default migration healthy time - tg.Migrate = structs.DefaultMigrateStrategy() - tg.Migrate.MinHealthyTime = 100 * time.Millisecond - tg.Migrate.HealthCheck = structs.MigrateStrategyHealthStates - - // Ensure the alloc is *not* part of a deployment - ar.alloc.DeploymentID = "" - - go ar.Run() - defer ar.Destroy() - - testutil.WaitForResult(func() (bool, error) { - last := upd.Last() - if last == nil { - return false, fmt.Errorf("No updates") - } - if !last.DeploymentStatus.HasHealth() { - return false, fmt.Errorf("want deployment status unhealthy; got unset") - } else if !*last.DeploymentStatus.Healthy { - return false, fmt.Errorf("want deployment status healthy; got unhealthy") - } - return true, nil - }, func(err error) { - t.Fatalf("err: %v", err) - }) -} - -// Test that health is *not* reported for batch jobs -func TestAllocRunner_DeploymentHealth_BatchDisabled(t *testing.T) { - t.Parallel() - - // Ensure the task fails and restarts - alloc := mock.BatchAlloc() - tg := alloc.Job.TaskGroups[0] - - // This should not be possile as validation should prevent batch jobs - // from having a migration stanza! - tg.Migrate = structs.DefaultMigrateStrategy() - tg.Migrate.MinHealthyTime = 1 * time.Millisecond - tg.Migrate.HealthyDeadline = 2 * time.Millisecond - tg.Migrate.HealthCheck = structs.MigrateStrategyHealthStates - - task := tg.Tasks[0] - task.Driver = "mock_driver" - task.Config = map[string]interface{}{ - "run_for": "5s", - } - upd, ar := TestAllocRunnerFromAlloc(t, alloc, false) - - go ar.Run() - defer ar.Destroy() - - testutil.WaitForResult(func() (bool, error) { - last := upd.Last() - if last == nil { - return false, fmt.Errorf("No updates") - } - if last.DeploymentStatus != nil { - return false, fmt.Errorf("unexpected deployment health set: %v", last.DeploymentStatus.Healthy) - } - return true, nil - }, func(err error) { - t.Fatalf("err: %v", err) - }) -} - -// TestAllocRuner_RetryArtifact ensures that if one task in a task group is -// retrying fetching an artifact, other tasks in the group should be able -// to proceed. -func TestAllocRunner_RetryArtifact(t *testing.T) { - t.Parallel() - - alloc := mock.Alloc() - alloc.Job.Type = structs.JobTypeBatch - alloc.Job.TaskGroups[0].RestartPolicy.Mode = structs.RestartPolicyModeFail - alloc.Job.TaskGroups[0].RestartPolicy.Attempts = 1 - alloc.Job.TaskGroups[0].RestartPolicy.Delay = time.Duration(4*testutil.TestMultiplier()) * time.Second - - task := alloc.Job.TaskGroups[0].Tasks[0] - task.Driver = "mock_driver" - task.Config = map[string]interface{}{ - "exit_code": "0", - "run_for": "1s", - } - - // Create a new task with a bad artifact - badtask := alloc.Job.TaskGroups[0].Tasks[0].Copy() - badtask.Name = "bad" - badtask.Artifacts = []*structs.TaskArtifact{ - {GetterSource: "http://127.0.0.1:0/foo/bar/baz"}, - } - - alloc.Job.TaskGroups[0].Tasks = append(alloc.Job.TaskGroups[0].Tasks, badtask) - upd, ar := TestAllocRunnerFromAlloc(t, alloc, true) - go ar.Run() - defer ar.Destroy() - - testutil.WaitForResult(func() (bool, error) { - last := upd.Last() - if last == nil { - return false, fmt.Errorf("No updates") - } - - // web task should have completed successfully while bad task - // retries artifact fetching - webstate, ok := last.TaskStates["web"] - if !ok { - return false, fmt.Errorf("no task state for web") - } - if webstate.State != structs.TaskStateDead { - return false, fmt.Errorf("expected web to be dead but found %q", last.TaskStates["web"].State) - } - if !webstate.Successful() { - return false, fmt.Errorf("expected web to have exited successfully") - } - - // bad task should have failed - badstate := last.TaskStates["bad"] - if badstate.State != structs.TaskStateDead { - return false, fmt.Errorf("expected bad to be dead but found %q", badstate.State) - } - if !badstate.Failed { - return false, fmt.Errorf("expected bad to have failed: %#v", badstate.Events) - } - return true, nil - }, func(err error) { - t.Fatalf("err: %v", err) - }) -} - -func TestAllocRunner_TerminalUpdate_Destroy(t *testing.T) { - t.Parallel() - upd, ar := TestAllocRunner(t, false) - - // Ensure task takes some time - task := ar.alloc.Job.TaskGroups[0].Tasks[0] - task.Driver = "mock_driver" - task.Config = map[string]interface{}{ - "run_for": "10s", - } - go ar.Run() - - testutil.WaitForResult(func() (bool, error) { - last := upd.Last() - if last == nil { - return false, fmt.Errorf("No updates") - } - if last.ClientStatus != structs.AllocClientStatusRunning { - return false, fmt.Errorf("got status %v; want %v", last.ClientStatus, structs.AllocClientStatusRunning) - } - return true, nil - }, func(err error) { - t.Fatalf("err: %v", err) - }) - - // Update the alloc to be terminal which should cause the alloc runner to - // stop the tasks and wait for a destroy. - update := ar.alloc.Copy() - update.DesiredStatus = structs.AllocDesiredStatusStop - ar.Update(update) - - testutil.WaitForResult(func() (bool, error) { - last := upd.Last() - if last == nil { - return false, fmt.Errorf("No updates") - } - - // Check the status has changed. - if last.ClientStatus != structs.AllocClientStatusComplete { - return false, fmt.Errorf("got client status %v; want %v", last.ClientStatus, structs.AllocClientStatusComplete) - } - - // Check the allocation state still exists - if err := ar.stateDB.View(func(tx *bolt.Tx) error { - if !allocationBucketExists(tx, ar.Alloc().ID) { - return fmt.Errorf("no bucket for alloc") - } - - return nil - }); err != nil { - return false, fmt.Errorf("state destroyed") - } - - // Check the alloc directory still exists - if _, err := os.Stat(ar.allocDir.AllocDir); err != nil { - return false, fmt.Errorf("alloc dir destroyed: %v", ar.allocDir.AllocDir) - } - - return true, nil - }, func(err error) { - t.Fatalf("err: %v", err) - }) - - // Send the destroy signal and ensure the AllocRunner cleans up. - ar.Destroy() - - testutil.WaitForResult(func() (bool, error) { - last := upd.Last() - if last == nil { - return false, fmt.Errorf("No updates") - } - - // Check the status has changed. - if last.ClientStatus != structs.AllocClientStatusComplete { - return false, fmt.Errorf("got client status %v; want %v", last.ClientStatus, structs.AllocClientStatusComplete) - } - - // Check the state was cleaned - if err := ar.stateDB.View(func(tx *bolt.Tx) error { - if allocationBucketExists(tx, ar.Alloc().ID) { - return fmt.Errorf("bucket for alloc exists") - } - - return nil - }); err != nil { - return false, fmt.Errorf("state not destroyed") - } - - // Check the alloc directory was cleaned - if _, err := os.Stat(ar.allocDir.AllocDir); err == nil { - return false, fmt.Errorf("alloc dir still exists: %v", ar.allocDir.AllocDir) - } else if !os.IsNotExist(err) { - return false, fmt.Errorf("stat err: %v", err) - } - - return true, nil - }, func(err error) { - t.Fatalf("err: %v", err) - }) -} - -func TestAllocRunner_Destroy(t *testing.T) { - t.Parallel() - upd, ar := TestAllocRunner(t, false) - - // Ensure task takes some time - task := ar.alloc.Job.TaskGroups[0].Tasks[0] - task.Driver = "mock_driver" - task.Config = map[string]interface{}{ - "run_for": "10s", - } - go ar.Run() - start := time.Now() - - // Begin the tear down - go func() { - time.Sleep(1 * time.Second) - ar.Destroy() - }() - - testutil.WaitForResult(func() (bool, error) { - last := upd.Last() - if last == nil { - return false, fmt.Errorf("No updates") - } - - // Check the status has changed. - if last.ClientStatus != structs.AllocClientStatusComplete { - return false, fmt.Errorf("got client status %v; want %v", last.ClientStatus, structs.AllocClientStatusComplete) - } - - // Check the state was cleaned - if err := ar.stateDB.View(func(tx *bolt.Tx) error { - if allocationBucketExists(tx, ar.Alloc().ID) { - return fmt.Errorf("bucket for alloc exists") - } - - return nil - }); err != nil { - return false, fmt.Errorf("state not destroyed: %v", err) - } - - // Check the alloc directory was cleaned - if _, err := os.Stat(ar.allocDir.AllocDir); err == nil { - return false, fmt.Errorf("alloc dir still exists: %v", ar.allocDir.AllocDir) - } else if !os.IsNotExist(err) { - return false, fmt.Errorf("stat err: %v", err) - } - - return true, nil - }, func(err error) { - t.Fatalf("err: %v", err) - }) - - if elapsed := time.Since(start); elapsed > 20*time.Second { - t.Fatalf("took too long to terminate: %s", elapsed) - } -} - -func TestAllocRunner_Update(t *testing.T) { - t.Parallel() - _, ar := TestAllocRunner(t, false) - - // Deep copy the alloc to avoid races when updating - newAlloc := ar.Alloc().Copy() - - // Ensure task takes some time - task := ar.alloc.Job.TaskGroups[0].Tasks[0] - task.Driver = "mock_driver" - task.Config = map[string]interface{}{ - "run_for": "10s", - } - go ar.Run() - defer ar.Destroy() - - // Update the alloc definition - newAlloc.Name = "FOO" - newAlloc.AllocModifyIndex++ - ar.Update(newAlloc) - - // Check the alloc runner stores the update allocation. - testutil.WaitForResult(func() (bool, error) { - return ar.Alloc().Name == "FOO", nil - }, func(err error) { - t.Fatalf("err: %v %#v", err, ar.Alloc()) - }) -} - -func TestAllocRunner_SaveRestoreState(t *testing.T) { - t.Parallel() - alloc := mock.Alloc() - task := alloc.Job.TaskGroups[0].Tasks[0] - task.Driver = "mock_driver" - task.Config = map[string]interface{}{ - "exit_code": "0", - "run_for": "10s", - } - - upd, ar := TestAllocRunnerFromAlloc(t, alloc, false) - go ar.Run() - defer ar.Destroy() - - // Snapshot state - testutil.WaitForResult(func() (bool, error) { - ar.taskLock.RLock() - defer ar.taskLock.RUnlock() - return len(ar.tasks) == 1, nil - }, func(err error) { - t.Fatalf("task never started: %v", err) - }) - - err := ar.SaveState() - if err != nil { - t.Fatalf("err: %v", err) - } - - // Create a new alloc runner - l2 := testlog.WithPrefix(t, "----- ar2: ") - alloc2 := &structs.Allocation{ID: ar.alloc.ID} - prevAlloc := NewAllocWatcher(alloc2, ar, nil, ar.config, l2, "") - ar2 := NewAllocRunner(l2, ar.config, ar.stateDB, upd.Update, - alloc2, ar.vaultClient, ar.consulClient, prevAlloc) - err = ar2.RestoreState() - if err != nil { - t.Fatalf("err: %v", err) - } - go ar2.Run() - - testutil.WaitForResult(func() (bool, error) { - if len(ar2.tasks) != 1 { - return false, fmt.Errorf("Incorrect number of tasks") - } - - last := upd.Last() - if last == nil { - return false, nil - } - - return last.ClientStatus == structs.AllocClientStatusRunning, nil - }, func(err error) { - last := upd.Last() - t.Fatalf("err: %v %#v %#v", err, last, last.TaskStates["web"]) - }) - - // Destroy and wait - ar2.Destroy() - start := time.Now() - - testutil.WaitForResult(func() (bool, error) { - alloc := ar2.Alloc() - if alloc.ClientStatus != structs.AllocClientStatusComplete { - return false, fmt.Errorf("Bad client status; got %v; want %v", alloc.ClientStatus, structs.AllocClientStatusComplete) - } - return true, nil - }, func(err error) { - last := upd.Last() - t.Fatalf("err: %v %#v %#v", err, last, last.TaskStates) - }) - - if time.Since(start) > time.Duration(testutil.TestMultiplier()*5)*time.Second { - t.Fatalf("took too long to terminate") - } -} - -func TestAllocRunner_SaveRestoreState_TerminalAlloc(t *testing.T) { - t.Parallel() - upd, ar := TestAllocRunner(t, false) - ar.logger = testlog.WithPrefix(t, "ar1: ") - - // Ensure task takes some time - ar.alloc.Job.TaskGroups[0].Tasks[0].Driver = "mock_driver" - task := ar.alloc.Job.TaskGroups[0].Tasks[0] - task.Config = map[string]interface{}{ - "run_for": "10s", - } - go ar.Run() - defer ar.Destroy() - - testutil.WaitForResult(func() (bool, error) { - last := upd.Last() - if last == nil { - return false, fmt.Errorf("No updates") - } - - if last.ClientStatus != structs.AllocClientStatusRunning { - return false, fmt.Errorf("got status %v; want %v", last.ClientStatus, structs.AllocClientStatusRunning) - } - return true, nil - }, func(err error) { - t.Fatalf("err: %v", err) - }) - - // Update the alloc to be terminal which should cause the alloc runner to - // stop the tasks and wait for a destroy. - update := ar.alloc.Copy() - update.DesiredStatus = structs.AllocDesiredStatusStop - ar.Update(update) - - testutil.WaitForResult(func() (bool, error) { - return ar.Alloc().DesiredStatus == structs.AllocDesiredStatusStop, nil - }, func(err error) { - t.Fatalf("err: %v", err) - }) - - err := ar.SaveState() - if err != nil { - t.Fatalf("err: %v", err) - } - - // Ensure ar1 doesn't recreate the state file - ar.allocLock.Lock() - defer ar.allocLock.Unlock() - - // Create a new alloc runner - l2 := testlog.WithPrefix(t, "ar2: ") - alloc2 := &structs.Allocation{ID: ar.alloc.ID} - prevAlloc := NewAllocWatcher(alloc2, ar, nil, ar.config, l2, "") - ar2 := NewAllocRunner(l2, ar.config, ar.stateDB, upd.Update, - alloc2, ar.vaultClient, ar.consulClient, prevAlloc) - err = ar2.RestoreState() - if err != nil { - t.Fatalf("err: %v", err) - } - ar2.logger.Println("[TESTING] running second alloc runner") - go ar2.Run() - defer ar2.Destroy() // Just-in-case of failure before Destroy below - - testutil.WaitForResult(func() (bool, error) { - // Check the state still exists - if err := ar.stateDB.View(func(tx *bolt.Tx) error { - if !allocationBucketExists(tx, ar2.Alloc().ID) { - return fmt.Errorf("no bucket for alloc") - } - - return nil - }); err != nil { - return false, fmt.Errorf("state destroyed") - } - - // Check the alloc directory still exists - if _, err := os.Stat(ar.allocDir.AllocDir); err != nil { - return false, fmt.Errorf("alloc dir destroyed: %v", ar.allocDir.AllocDir) - } - - return true, nil - }, func(err error) { - last := upd.Last() - t.Fatalf("err: %v %#v %#v", err, last, last.TaskStates) - }) - - // Send the destroy signal and ensure the AllocRunner cleans up. - ar2.logger.Println("[TESTING] destroying second alloc runner") - ar2.Destroy() - - testutil.WaitForResult(func() (bool, error) { - last := upd.Last() - if last == nil { - return false, fmt.Errorf("No updates") - } - - // Check the status has changed. - if last.ClientStatus != structs.AllocClientStatusComplete { - return false, fmt.Errorf("got client status %v; want %v", last.ClientStatus, structs.AllocClientStatusComplete) - } - - // Check the state was cleaned - if err := ar.stateDB.View(func(tx *bolt.Tx) error { - if allocationBucketExists(tx, ar2.Alloc().ID) { - return fmt.Errorf("bucket for alloc exists") - } - - return nil - }); err != nil { - return false, fmt.Errorf("state not destroyed") - } - - // Check the alloc directory was cleaned - if _, err := os.Stat(ar.allocDir.AllocDir); err == nil { - return false, fmt.Errorf("alloc dir still exists: %v", ar.allocDir.AllocDir) - } else if !os.IsNotExist(err) { - return false, fmt.Errorf("stat err: %v", err) - } - - return true, nil - }, func(err error) { - t.Fatalf("err: %v", err) - }) -} - -func TestAllocRunner_TaskFailed_KillTG(t *testing.T) { - t.Parallel() - upd, ar := TestAllocRunner(t, false) - - // Create two tasks in the task group - task := ar.alloc.Job.TaskGroups[0].Tasks[0] - task.Driver = "mock_driver" - task.KillTimeout = 10 * time.Millisecond - task.Config = map[string]interface{}{ - "run_for": "10s", - } - - task2 := ar.alloc.Job.TaskGroups[0].Tasks[0].Copy() - task2.Name = "task 2" - task2.Driver = "mock_driver" - task2.Config = map[string]interface{}{ - "start_error": "fail task please", - } - ar.alloc.Job.TaskGroups[0].Tasks = append(ar.alloc.Job.TaskGroups[0].Tasks, task2) - ar.alloc.AllocatedResources.Tasks[task2.Name] = ar.alloc.AllocatedResources.Tasks[task.Name].Copy() - go ar.Run() - defer ar.Destroy() - - testutil.WaitForResult(func() (bool, error) { - last := upd.Last() - if last == nil { - return false, fmt.Errorf("No updates") - } - if last.ClientStatus != structs.AllocClientStatusFailed { - return false, fmt.Errorf("got status %v; want %v", last.ClientStatus, structs.AllocClientStatusFailed) - } - - // Task One should be killed - state1 := last.TaskStates[task.Name] - if state1.State != structs.TaskStateDead { - return false, fmt.Errorf("got state %v; want %v", state1.State, structs.TaskStateDead) - } - if len(state1.Events) < 2 { - // At least have a received and destroyed - return false, fmt.Errorf("Unexpected number of events") - } - - found := false - for _, e := range state1.Events { - if e.Type != structs.TaskSiblingFailed { - found = true - } - } - - if !found { - return false, fmt.Errorf("Did not find event %v", structs.TaskSiblingFailed) - } - - // Task Two should be failed - state2 := last.TaskStates[task2.Name] - if state2.State != structs.TaskStateDead { - return false, fmt.Errorf("got state %v; want %v", state2.State, structs.TaskStateDead) - } - if !state2.Failed { - return false, fmt.Errorf("task2 should have failed") - } - - return true, nil - }, func(err error) { - t.Fatalf("err: %v", err) - }) -} - -func TestAllocRunner_TaskLeader_KillTG(t *testing.T) { - t.Parallel() - upd, ar := TestAllocRunner(t, false) - - // Create two tasks in the task group - task := ar.alloc.Job.TaskGroups[0].Tasks[0] - task.Driver = "mock_driver" - task.KillTimeout = 10 * time.Millisecond - task.Config = map[string]interface{}{ - "run_for": "10s", - } - - task2 := ar.alloc.Job.TaskGroups[0].Tasks[0].Copy() - task2.Name = "task 2" - task2.Driver = "mock_driver" - task2.Leader = true - task2.Config = map[string]interface{}{ - "run_for": "1s", - } - ar.alloc.Job.TaskGroups[0].Tasks = append(ar.alloc.Job.TaskGroups[0].Tasks, task2) - ar.alloc.AllocatedResources.Tasks[task2.Name] = ar.alloc.AllocatedResources.Tasks[task.Name].Copy() - go ar.Run() - defer ar.Destroy() - - testutil.WaitForResult(func() (bool, error) { - last := upd.Last() - if last == nil { - return false, fmt.Errorf("No updates") - } - if last.ClientStatus != structs.AllocClientStatusComplete { - return false, fmt.Errorf("got status %v; want %v", last.ClientStatus, structs.AllocClientStatusComplete) - } - - // Task One should be killed - state1 := last.TaskStates[task.Name] - if state1.State != structs.TaskStateDead { - return false, fmt.Errorf("got state %v; want %v", state1.State, structs.TaskStateDead) - } - if state1.FinishedAt.IsZero() || state1.StartedAt.IsZero() { - return false, fmt.Errorf("expected to have a start and finish time") - } - if len(state1.Events) < 2 { - // At least have a received and destroyed - return false, fmt.Errorf("Unexpected number of events") - } - - found := false - for _, e := range state1.Events { - if e.Type != structs.TaskLeaderDead { - found = true - } - } - - if !found { - return false, fmt.Errorf("Did not find event %v", structs.TaskLeaderDead) - } - - // Task Two should be dead - state2 := last.TaskStates[task2.Name] - if state2.State != structs.TaskStateDead { - return false, fmt.Errorf("got state %v; want %v", state2.State, structs.TaskStateDead) - } - if state2.FinishedAt.IsZero() || state2.StartedAt.IsZero() { - return false, fmt.Errorf("expected to have a start and finish time") - } - - return true, nil - }, func(err error) { - t.Fatalf("err: %v", err) - }) -} - -// TestAllocRunner_TaskLeader_StopTG asserts that when stopping a task group -// with a leader the leader is stopped before other tasks. -func TestAllocRunner_TaskLeader_StopTG(t *testing.T) { - t.Parallel() - upd, ar := TestAllocRunner(t, false) - - // Create 3 tasks in the task group - task := ar.alloc.Job.TaskGroups[0].Tasks[0] - task.Name = "follower1" - task.Driver = "mock_driver" - task.KillTimeout = 10 * time.Millisecond - task.Config = map[string]interface{}{ - "run_for": "10s", - } - - task2 := ar.alloc.Job.TaskGroups[0].Tasks[0].Copy() - task2.Name = "leader" - task2.Driver = "mock_driver" - task2.Leader = true - task2.KillTimeout = 10 * time.Millisecond - task2.Config = map[string]interface{}{ - "run_for": "10s", - } - - task3 := ar.alloc.Job.TaskGroups[0].Tasks[0].Copy() - task3.Name = "follower2" - task3.Driver = "mock_driver" - task3.KillTimeout = 10 * time.Millisecond - task3.Config = map[string]interface{}{ - "run_for": "10s", - } - ar.alloc.Job.TaskGroups[0].Tasks = append(ar.alloc.Job.TaskGroups[0].Tasks, task2, task3) - ar.alloc.AllocatedResources.Tasks[task.Name] = ar.alloc.AllocatedResources.Tasks["web"].Copy() - ar.alloc.AllocatedResources.Tasks[task2.Name] = ar.alloc.AllocatedResources.Tasks[task.Name].Copy() - ar.alloc.AllocatedResources.Tasks[task3.Name] = ar.alloc.AllocatedResources.Tasks[task.Name].Copy() - defer ar.Destroy() - - go ar.Run() - - // Wait for tasks to start - last := upd.Last() - testutil.WaitForResult(func() (bool, error) { - last = upd.Last() - if last == nil { - return false, fmt.Errorf("No updates") - } - if n := len(last.TaskStates); n != 3 { - return false, fmt.Errorf("Not enough task states (want: 3; found %d)", n) - } - for name, state := range last.TaskStates { - if state.State != structs.TaskStateRunning { - return false, fmt.Errorf("Task %q is not running yet (it's %q)", name, state.State) - } - } - return true, nil - }, func(err error) { - t.Fatalf("err: %v", err) - }) - - // Reset updates - upd.mu.Lock() - upd.Allocs = upd.Allocs[:0] - upd.mu.Unlock() - - // Stop alloc - update := ar.Alloc() - update.DesiredStatus = structs.AllocDesiredStatusStop - ar.Update(update) - - // Wait for tasks to stop - testutil.WaitForResult(func() (bool, error) { - last := upd.Last() - if last == nil { - return false, fmt.Errorf("No updates") - } - if last.TaskStates["leader"].FinishedAt.UnixNano() >= last.TaskStates["follower1"].FinishedAt.UnixNano() { - return false, fmt.Errorf("expected leader to finish before follower1: %s >= %s", - last.TaskStates["leader"].FinishedAt, last.TaskStates["follower1"].FinishedAt) - } - if last.TaskStates["leader"].FinishedAt.UnixNano() >= last.TaskStates["follower2"].FinishedAt.UnixNano() { - return false, fmt.Errorf("expected leader to finish before follower2: %s >= %s", - last.TaskStates["leader"].FinishedAt, last.TaskStates["follower2"].FinishedAt) - } - return true, nil - }, func(err error) { - last := upd.Last() - for name, state := range last.TaskStates { - t.Logf("%s: %s", name, state.State) - } - t.Fatalf("err: %v", err) - }) -} - -// TestAllocRunner_TaskLeader_StopRestoredTG asserts that when stopping a -// restored task group with a leader that failed before restoring the leader is -// not stopped as it does not exist. -// See https://github.com/hashicorp/nomad/issues/3420#issuecomment-341666932 -func TestAllocRunner_TaskLeader_StopRestoredTG(t *testing.T) { - t.Skip("Skipping because the functionality being tested doesn't exist") - t.Parallel() - _, ar := TestAllocRunner(t, false) - defer ar.Destroy() - - // Create a leader and follower task in the task group - task := ar.alloc.Job.TaskGroups[0].Tasks[0] - task.Name = "follower1" - task.Driver = "mock_driver" - task.KillTimeout = 10 * time.Second - task.Config = map[string]interface{}{ - "run_for": "10s", - } - - task2 := ar.alloc.Job.TaskGroups[0].Tasks[0].Copy() - task2.Name = "leader" - task2.Driver = "mock_driver" - task2.Leader = true - task2.KillTimeout = 10 * time.Millisecond - task2.Config = map[string]interface{}{ - "run_for": "0s", - } - - ar.alloc.Job.TaskGroups[0].Tasks = append(ar.alloc.Job.TaskGroups[0].Tasks, task2) - ar.alloc.AllocatedResources.Tasks[task.Name] = ar.alloc.AllocatedResources.Tasks["web"].Copy() - ar.alloc.AllocatedResources.Tasks[task2.Name] = ar.alloc.AllocatedResources.Tasks[task.Name].Copy() - - // Mimic Nomad exiting before the leader stopping is able to stop other tasks. - ar.tasks = map[string]*taskrunner.TaskRunner{ - "leader": taskrunner.NewTaskRunner(ar.logger, ar.config, ar.stateDB, ar.setTaskState, - ar.allocDir.NewTaskDir(task2.Name), ar.Alloc(), task2.Copy(), - ar.vaultClient, ar.consulClient), - "follower1": taskrunner.NewTaskRunner(ar.logger, ar.config, ar.stateDB, ar.setTaskState, - ar.allocDir.NewTaskDir(task.Name), ar.Alloc(), task.Copy(), - ar.vaultClient, ar.consulClient), - } - ar.taskStates = map[string]*structs.TaskState{ - "leader": {State: structs.TaskStateDead}, - "follower1": {State: structs.TaskStateRunning}, - } - if err := ar.SaveState(); err != nil { - t.Fatalf("error saving state: %v", err) - } - - // Create a new AllocRunner to test RestoreState and Run - upd2 := &MockAllocStateUpdater{} - ar2 := NewAllocRunner(ar.logger, ar.config, ar.stateDB, upd2.Update, ar.alloc, - ar.vaultClient, ar.consulClient, ar.prevAlloc) - defer ar2.Destroy() - - if err := ar2.RestoreState(); err != nil { - t.Fatalf("error restoring state: %v", err) - } - go ar2.Run() - - // Wait for tasks to be stopped because leader is dead - testutil.WaitForResult(func() (bool, error) { - alloc := ar2.Alloc() - for task, state := range alloc.TaskStates { - if state.State != structs.TaskStateDead { - return false, fmt.Errorf("Task %q should be dead: %v", task, state.State) - } - } - return true, nil - }, func(err error) { - t.Fatalf("err: %v", err) - }) - - // Make sure it GCs properly - ar2.Destroy() - - select { - case <-ar2.WaitCh(): - // exited as expected - case <-time.After(10 * time.Second): - t.Fatalf("timed out waiting for AR to GC") - } -} - -// TestAllocRunner_MoveAllocDir asserts that a file written to an alloc's -// local/ dir will be moved to a replacement alloc's local/ dir if sticky -// volumes is on. -func TestAllocRunner_MoveAllocDir(t *testing.T) { - t.Parallel() - // Create an alloc runner - alloc := mock.Alloc() - task := alloc.Job.TaskGroups[0].Tasks[0] - task.Driver = "mock_driver" - task.Config = map[string]interface{}{ - "run_for": "1s", - } - upd, ar := TestAllocRunnerFromAlloc(t, alloc, false) - go ar.Run() - defer ar.Destroy() - - testutil.WaitForResult(func() (bool, error) { - last := upd.Last() - if last == nil { - return false, fmt.Errorf("No updates") - } - if last.ClientStatus != structs.AllocClientStatusComplete { - return false, fmt.Errorf("got status %v; want %v", last.ClientStatus, structs.AllocClientStatusComplete) - } - return true, nil - }, func(err error) { - t.Fatalf("err: %v", err) - }) - - // Write some data in data dir and task dir of the alloc - dataFile := filepath.Join(ar.allocDir.SharedDir, "data", "data_file") - ioutil.WriteFile(dataFile, []byte("hello world"), os.ModePerm) - taskDir := ar.allocDir.TaskDirs[task.Name] - taskLocalFile := filepath.Join(taskDir.LocalDir, "local_file") - ioutil.WriteFile(taskLocalFile, []byte("good bye world"), os.ModePerm) - - // Create another alloc runner - alloc2 := mock.Alloc() - alloc2.PreviousAllocation = ar.allocID - alloc2.Job.TaskGroups[0].EphemeralDisk.Sticky = true - task = alloc2.Job.TaskGroups[0].Tasks[0] - task.Driver = "mock_driver" - task.Config = map[string]interface{}{ - "run_for": "1s", - } - upd2, ar2 := TestAllocRunnerFromAlloc(t, alloc2, false) - - // Set prevAlloc like Client does - ar2.prevAlloc = NewAllocWatcher(alloc2, ar, nil, ar2.config, ar2.logger, "") - - go ar2.Run() - defer ar2.Destroy() - - testutil.WaitForResult(func() (bool, error) { - last := upd2.Last() - if last == nil { - return false, fmt.Errorf("No updates") - } - if last.ClientStatus != structs.AllocClientStatusComplete { - return false, fmt.Errorf("got status %v; want %v", last.ClientStatus, structs.AllocClientStatusComplete) - } - return true, nil - }, func(err error) { - t.Fatalf("err: %v", err) - }) - - // Ensure that data from ar was moved to ar2 - taskDir = ar2.allocDir.TaskDirs[task.Name] - taskLocalFile = filepath.Join(taskDir.LocalDir, "local_file") - if fileInfo, _ := os.Stat(taskLocalFile); fileInfo == nil { - t.Fatalf("file %v not found", taskLocalFile) - } - - dataFile = filepath.Join(ar2.allocDir.SharedDir, "data", "data_file") - if fileInfo, _ := os.Stat(dataFile); fileInfo == nil { - t.Fatalf("file %v not found", dataFile) - } -} diff --git a/client/allocrunnerdeprecated/taskrunner/consul_template.go b/client/allocrunnerdeprecated/taskrunner/consul_template.go deleted file mode 100644 index 4bdcb16e996c..000000000000 --- a/client/allocrunnerdeprecated/taskrunner/consul_template.go +++ /dev/null @@ -1,690 +0,0 @@ -// +build deprecated - -package taskrunner - -import ( - "fmt" - "math/rand" - "os" - "path/filepath" - "sort" - "strconv" - "strings" - "sync" - "time" - - ctconf "github.com/hashicorp/consul-template/config" - "github.com/hashicorp/consul-template/manager" - "github.com/hashicorp/consul-template/signals" - envparse "github.com/hashicorp/go-envparse" - multierror "github.com/hashicorp/go-multierror" - "github.com/hashicorp/nomad/client/config" - "github.com/hashicorp/nomad/client/taskenv" - "github.com/hashicorp/nomad/helper" - "github.com/hashicorp/nomad/nomad/structs" -) - -const ( - // consulTemplateSourceName is the source name when using the TaskHooks. - consulTemplateSourceName = "Template" - - // hostSrcOption is the Client option that determines whether the template - // source may be from the host - hostSrcOption = "template.allow_host_source" - - // missingDepEventLimit is the number of missing dependencies that will be - // logged before we switch to showing just the number of missing - // dependencies. - missingDepEventLimit = 3 - - // DefaultMaxTemplateEventRate is the default maximum rate at which a - // template event should be fired. - DefaultMaxTemplateEventRate = 3 * time.Second -) - -// TaskHooks is an interface which provides hooks into the tasks life-cycle -type TaskHooks interface { - // Restart is used to restart the task - Restart(source, reason string, failure bool) - - // Signal is used to signal the task - Signal(source, reason string, s os.Signal) error - - // UnblockStart is used to unblock the starting of the task. This should be - // called after prestart work is completed - UnblockStart(source string) - - // Kill is used to kill the task because of the passed error. If fail is set - // to true, the task is marked as failed - Kill(source, reason string, fail bool) - - // EmitEvent is used to emit an event to be stored in the tasks events. - EmitEvent(source, message string) -} - -// TaskTemplateManager is used to run a set of templates for a given task -type TaskTemplateManager struct { - // config holds the template managers configuration - config *TaskTemplateManagerConfig - - // lookup allows looking up the set of Nomad templates by their consul-template ID - lookup map[string][]*structs.Template - - // runner is the consul-template runner - runner *manager.Runner - - // signals is a lookup map from the string representation of a signal to its - // actual signal - signals map[string]os.Signal - - // shutdownCh is used to signal and started goroutine to shutdown - shutdownCh chan struct{} - - // shutdown marks whether the manager has been shutdown - shutdown bool - shutdownLock sync.Mutex -} - -// TaskTemplateManagerConfig is used to configure an instance of the -// TaskTemplateManager -type TaskTemplateManagerConfig struct { - // Hooks is used to interact with the task the template manager is being run - // for - Hooks TaskHooks - - // Templates is the set of templates we are managing - Templates []*structs.Template - - // ClientConfig is the Nomad Client configuration - ClientConfig *config.Config - - // VaultToken is the Vault token for the task. - VaultToken string - - // TaskDir is the task's directory - TaskDir string - - // EnvBuilder is the environment variable builder for the task. - EnvBuilder *taskenv.Builder - - // MaxTemplateEventRate is the maximum rate at which we should emit events. - MaxTemplateEventRate time.Duration - - // retryRate is only used for testing and is used to increase the retry rate - retryRate time.Duration -} - -// Validate validates the configuration. -func (c *TaskTemplateManagerConfig) Validate() error { - if c == nil { - return fmt.Errorf("Nil config passed") - } else if c.Hooks == nil { - return fmt.Errorf("Invalid task hooks given") - } else if c.ClientConfig == nil { - return fmt.Errorf("Invalid client config given") - } else if c.TaskDir == "" { - return fmt.Errorf("Invalid task directory given") - } else if c.EnvBuilder == nil { - return fmt.Errorf("Invalid task environment given") - } else if c.MaxTemplateEventRate == 0 { - return fmt.Errorf("Invalid max template event rate given") - } - - return nil -} - -func NewTaskTemplateManager(config *TaskTemplateManagerConfig) (*TaskTemplateManager, error) { - // Check pre-conditions - if err := config.Validate(); err != nil { - return nil, err - } - - tm := &TaskTemplateManager{ - config: config, - shutdownCh: make(chan struct{}), - } - - // Parse the signals that we need - for _, tmpl := range config.Templates { - if tmpl.ChangeSignal == "" { - continue - } - - sig, err := signals.Parse(tmpl.ChangeSignal) - if err != nil { - return nil, fmt.Errorf("Failed to parse signal %q", tmpl.ChangeSignal) - } - - if tm.signals == nil { - tm.signals = make(map[string]os.Signal) - } - - tm.signals[tmpl.ChangeSignal] = sig - } - - // Build the consul-template runner - runner, lookup, err := templateRunner(config) - if err != nil { - return nil, err - } - tm.runner = runner - tm.lookup = lookup - - go tm.run() - return tm, nil -} - -// Stop is used to stop the consul-template runner -func (tm *TaskTemplateManager) Stop() { - tm.shutdownLock.Lock() - defer tm.shutdownLock.Unlock() - - if tm.shutdown { - return - } - - close(tm.shutdownCh) - tm.shutdown = true - - // Stop the consul-template runner - if tm.runner != nil { - tm.runner.Stop() - } -} - -// run is the long lived loop that handles errors and templates being rendered -func (tm *TaskTemplateManager) run() { - // Runner is nil if there is no templates - if tm.runner == nil { - // Unblock the start if there is nothing to do - tm.config.Hooks.UnblockStart(consulTemplateSourceName) - return - } - - // Start the runner - go tm.runner.Start() - - // Block till all the templates have been rendered - tm.handleFirstRender() - - // Detect if there was a shutdown. - select { - case <-tm.shutdownCh: - return - default: - } - - // Read environment variables from env templates before we unblock - envMap, err := loadTemplateEnv(tm.config.Templates, tm.config.TaskDir) - if err != nil { - tm.config.Hooks.Kill(consulTemplateSourceName, err.Error(), true) - return - } - tm.config.EnvBuilder.SetTemplateEnv(envMap) - - // Unblock the task - tm.config.Hooks.UnblockStart(consulTemplateSourceName) - - // If all our templates are change mode no-op, then we can exit here - if tm.allTemplatesNoop() { - return - } - - // handle all subsequent render events. - tm.handleTemplateRerenders(time.Now()) -} - -// handleFirstRender blocks till all templates have been rendered -func (tm *TaskTemplateManager) handleFirstRender() { - // missingDependencies is the set of missing dependencies. - var missingDependencies map[string]struct{} - - // eventTimer is used to trigger the firing of an event showing the missing - // dependencies. - eventTimer := time.NewTimer(tm.config.MaxTemplateEventRate) - if !eventTimer.Stop() { - <-eventTimer.C - } - - // outstandingEvent tracks whether there is an outstanding event that should - // be fired. - outstandingEvent := false - - // Wait till all the templates have been rendered -WAIT: - for { - select { - case <-tm.shutdownCh: - return - case err, ok := <-tm.runner.ErrCh: - if !ok { - continue - } - - tm.config.Hooks.Kill(consulTemplateSourceName, err.Error(), true) - case <-tm.runner.TemplateRenderedCh(): - // A template has been rendered, figure out what to do - events := tm.runner.RenderEvents() - - // Not all templates have been rendered yet - if len(events) < len(tm.lookup) { - continue - } - - for _, event := range events { - // This template hasn't been rendered - if event.LastWouldRender.IsZero() { - continue WAIT - } - } - - break WAIT - case <-tm.runner.RenderEventCh(): - events := tm.runner.RenderEvents() - joinedSet := make(map[string]struct{}) - for _, event := range events { - missing := event.MissingDeps - if missing == nil { - continue - } - - for _, dep := range missing.List() { - joinedSet[dep.String()] = struct{}{} - } - } - - // Check to see if the new joined set is the same as the old - different := len(joinedSet) != len(missingDependencies) - if !different { - for k := range joinedSet { - if _, ok := missingDependencies[k]; !ok { - different = true - break - } - } - } - - // Nothing to do - if !different { - continue - } - - // Update the missing set - missingDependencies = joinedSet - - // Update the event timer channel - if !outstandingEvent { - // We got new data so reset - outstandingEvent = true - eventTimer.Reset(tm.config.MaxTemplateEventRate) - } - case <-eventTimer.C: - if missingDependencies == nil { - continue - } - - // Clear the outstanding event - outstandingEvent = false - - // Build the missing set - missingSlice := make([]string, 0, len(missingDependencies)) - for k := range missingDependencies { - missingSlice = append(missingSlice, k) - } - sort.Strings(missingSlice) - - if l := len(missingSlice); l > missingDepEventLimit { - missingSlice[missingDepEventLimit] = fmt.Sprintf("and %d more", l-missingDepEventLimit) - missingSlice = missingSlice[:missingDepEventLimit+1] - } - - missingStr := strings.Join(missingSlice, ", ") - tm.config.Hooks.EmitEvent(consulTemplateSourceName, fmt.Sprintf("Missing: %s", missingStr)) - } - } -} - -// handleTemplateRerenders is used to handle template render events after they -// have all rendered. It takes action based on which set of templates re-render. -// The passed allRenderedTime is the time at which all templates have rendered. -// This is used to avoid signaling the task for any render event before hand. -func (tm *TaskTemplateManager) handleTemplateRerenders(allRenderedTime time.Time) { - // A lookup for the last time the template was handled - handledRenders := make(map[string]time.Time, len(tm.config.Templates)) - - for { - select { - case <-tm.shutdownCh: - return - case err, ok := <-tm.runner.ErrCh: - if !ok { - continue - } - - tm.config.Hooks.Kill(consulTemplateSourceName, err.Error(), true) - case <-tm.runner.TemplateRenderedCh(): - // A template has been rendered, figure out what to do - var handling []string - signals := make(map[string]struct{}) - restart := false - var splay time.Duration - - events := tm.runner.RenderEvents() - for id, event := range events { - - // First time through - if allRenderedTime.After(event.LastDidRender) || allRenderedTime.Equal(event.LastDidRender) { - handledRenders[id] = allRenderedTime - continue - } - - // We have already handled this one - if htime := handledRenders[id]; htime.After(event.LastDidRender) || htime.Equal(event.LastDidRender) { - continue - } - - // Lookup the template and determine what to do - tmpls, ok := tm.lookup[id] - if !ok { - tm.config.Hooks.Kill(consulTemplateSourceName, fmt.Sprintf("template runner returned unknown template id %q", id), true) - return - } - - // Read environment variables from templates - envMap, err := loadTemplateEnv(tm.config.Templates, tm.config.TaskDir) - if err != nil { - tm.config.Hooks.Kill(consulTemplateSourceName, err.Error(), true) - return - } - tm.config.EnvBuilder.SetTemplateEnv(envMap) - - for _, tmpl := range tmpls { - switch tmpl.ChangeMode { - case structs.TemplateChangeModeSignal: - signals[tmpl.ChangeSignal] = struct{}{} - case structs.TemplateChangeModeRestart: - restart = true - case structs.TemplateChangeModeNoop: - continue - } - - if tmpl.Splay > splay { - splay = tmpl.Splay - } - } - - handling = append(handling, id) - } - - if restart || len(signals) != 0 { - if splay != 0 { - ns := splay.Nanoseconds() - offset := rand.Int63n(ns) - t := time.Duration(offset) - - select { - case <-time.After(t): - case <-tm.shutdownCh: - return - } - } - - // Update handle time - for _, id := range handling { - handledRenders[id] = events[id].LastDidRender - } - - if restart { - const failure = false - tm.config.Hooks.Restart(consulTemplateSourceName, "template with change_mode restart re-rendered", failure) - } else if len(signals) != 0 { - var mErr multierror.Error - for signal := range signals { - err := tm.config.Hooks.Signal(consulTemplateSourceName, "template re-rendered", tm.signals[signal]) - if err != nil { - multierror.Append(&mErr, err) - } - } - - if err := mErr.ErrorOrNil(); err != nil { - flat := make([]os.Signal, 0, len(signals)) - for signal := range signals { - flat = append(flat, tm.signals[signal]) - } - tm.config.Hooks.Kill(consulTemplateSourceName, fmt.Sprintf("Sending signals %v failed: %v", flat, err), true) - } - } - } - } - } -} - -// allTemplatesNoop returns whether all the managed templates have change mode noop. -func (tm *TaskTemplateManager) allTemplatesNoop() bool { - for _, tmpl := range tm.config.Templates { - if tmpl.ChangeMode != structs.TemplateChangeModeNoop { - return false - } - } - - return true -} - -// templateRunner returns a consul-template runner for the given templates and a -// lookup by destination to the template. If no templates are in the config, a -// nil template runner and lookup is returned. -func templateRunner(config *TaskTemplateManagerConfig) ( - *manager.Runner, map[string][]*structs.Template, error) { - - if len(config.Templates) == 0 { - return nil, nil, nil - } - - // Parse the templates - ctmplMapping, err := parseTemplateConfigs(config) - if err != nil { - return nil, nil, err - } - - // Create the runner configuration. - runnerConfig, err := newRunnerConfig(config, ctmplMapping) - if err != nil { - return nil, nil, err - } - - runner, err := manager.NewRunner(runnerConfig, false, false) - if err != nil { - return nil, nil, err - } - - // Set Nomad's environment variables - runner.Env = config.EnvBuilder.Build().All() - - // Build the lookup - idMap := runner.TemplateConfigMapping() - lookup := make(map[string][]*structs.Template, len(idMap)) - for id, ctmpls := range idMap { - for _, ctmpl := range ctmpls { - templates := lookup[id] - templates = append(templates, ctmplMapping[ctmpl]) - lookup[id] = templates - } - } - - return runner, lookup, nil -} - -// parseTemplateConfigs converts the tasks templates in the config into -// consul-templates -func parseTemplateConfigs(config *TaskTemplateManagerConfig) (map[ctconf.TemplateConfig]*structs.Template, error) { - allowAbs := config.ClientConfig.ReadBoolDefault(hostSrcOption, true) - taskEnv := config.EnvBuilder.Build() - - ctmpls := make(map[ctconf.TemplateConfig]*structs.Template, len(config.Templates)) - for _, tmpl := range config.Templates { - var src, dest string - if tmpl.SourcePath != "" { - if filepath.IsAbs(tmpl.SourcePath) { - if !allowAbs { - return nil, fmt.Errorf("Specifying absolute template paths disallowed by client config: %q", tmpl.SourcePath) - } - - src = tmpl.SourcePath - } else { - src = filepath.Join(config.TaskDir, taskEnv.ReplaceEnv(tmpl.SourcePath)) - } - } - if tmpl.DestPath != "" { - dest = filepath.Join(config.TaskDir, taskEnv.ReplaceEnv(tmpl.DestPath)) - } - - ct := ctconf.DefaultTemplateConfig() - ct.Source = &src - ct.Destination = &dest - ct.Contents = &tmpl.EmbeddedTmpl - ct.LeftDelim = &tmpl.LeftDelim - ct.RightDelim = &tmpl.RightDelim - - // Set the permissions - if tmpl.Perms != "" { - v, err := strconv.ParseUint(tmpl.Perms, 8, 12) - if err != nil { - return nil, fmt.Errorf("Failed to parse %q as octal: %v", tmpl.Perms, err) - } - m := os.FileMode(v) - ct.Perms = &m - } - ct.Finalize() - - ctmpls[*ct] = tmpl - } - - return ctmpls, nil -} - -// newRunnerConfig returns a consul-template runner configuration, setting the -// Vault and Consul configurations based on the clients configs. -func newRunnerConfig(config *TaskTemplateManagerConfig, - templateMapping map[ctconf.TemplateConfig]*structs.Template) (*ctconf.Config, error) { - - cc := config.ClientConfig - conf := ctconf.DefaultConfig() - - // Gather the consul-template templates - flat := ctconf.TemplateConfigs(make([]*ctconf.TemplateConfig, 0, len(templateMapping))) - for ctmpl := range templateMapping { - local := ctmpl - flat = append(flat, &local) - } - conf.Templates = &flat - - // Go through the templates and determine the minimum Vault grace - vaultGrace := time.Duration(-1) - for _, tmpl := range templateMapping { - // Initial condition - if vaultGrace < 0 { - vaultGrace = tmpl.VaultGrace - } else if tmpl.VaultGrace < vaultGrace { - vaultGrace = tmpl.VaultGrace - } - } - - // Force faster retries - if config.retryRate != 0 { - rate := config.retryRate - conf.Consul.Retry.Backoff = &rate - } - - // Setup the Consul config - if cc.ConsulConfig != nil { - conf.Consul.Address = &cc.ConsulConfig.Addr - conf.Consul.Token = &cc.ConsulConfig.Token - - if cc.ConsulConfig.EnableSSL != nil && *cc.ConsulConfig.EnableSSL { - verify := cc.ConsulConfig.VerifySSL != nil && *cc.ConsulConfig.VerifySSL - conf.Consul.SSL = &ctconf.SSLConfig{ - Enabled: helper.BoolToPtr(true), - Verify: &verify, - Cert: &cc.ConsulConfig.CertFile, - Key: &cc.ConsulConfig.KeyFile, - CaCert: &cc.ConsulConfig.CAFile, - } - } - - if cc.ConsulConfig.Auth != "" { - parts := strings.SplitN(cc.ConsulConfig.Auth, ":", 2) - if len(parts) != 2 { - return nil, fmt.Errorf("Failed to parse Consul Auth config") - } - - conf.Consul.Auth = &ctconf.AuthConfig{ - Enabled: helper.BoolToPtr(true), - Username: &parts[0], - Password: &parts[1], - } - } - } - - // Setup the Vault config - // Always set these to ensure nothing is picked up from the environment - emptyStr := "" - conf.Vault.RenewToken = helper.BoolToPtr(false) - conf.Vault.Token = &emptyStr - if cc.VaultConfig != nil && cc.VaultConfig.IsEnabled() { - conf.Vault.Address = &cc.VaultConfig.Addr - conf.Vault.Token = &config.VaultToken - conf.Vault.Grace = helper.TimeToPtr(vaultGrace) - - if strings.HasPrefix(cc.VaultConfig.Addr, "https") || cc.VaultConfig.TLSCertFile != "" { - skipVerify := cc.VaultConfig.TLSSkipVerify != nil && *cc.VaultConfig.TLSSkipVerify - verify := !skipVerify - conf.Vault.SSL = &ctconf.SSLConfig{ - Enabled: helper.BoolToPtr(true), - Verify: &verify, - Cert: &cc.VaultConfig.TLSCertFile, - Key: &cc.VaultConfig.TLSKeyFile, - CaCert: &cc.VaultConfig.TLSCaFile, - CaPath: &cc.VaultConfig.TLSCaPath, - ServerName: &cc.VaultConfig.TLSServerName, - } - } else { - conf.Vault.SSL = &ctconf.SSLConfig{ - Enabled: helper.BoolToPtr(false), - Verify: helper.BoolToPtr(false), - Cert: &emptyStr, - Key: &emptyStr, - CaCert: &emptyStr, - CaPath: &emptyStr, - ServerName: &emptyStr, - } - } - } - - conf.Finalize() - return conf, nil -} - -// loadTemplateEnv loads task environment variables from all templates. -func loadTemplateEnv(tmpls []*structs.Template, taskDir string) (map[string]string, error) { - all := make(map[string]string, 50) - for _, t := range tmpls { - if !t.Envvars { - continue - } - f, err := os.Open(filepath.Join(taskDir, t.DestPath)) - if err != nil { - return nil, fmt.Errorf("error opening env template: %v", err) - } - defer f.Close() - - // Parse environment fil - vars, err := envparse.Parse(f) - if err != nil { - return nil, fmt.Errorf("error parsing env template %q: %v", t.DestPath, err) - } - for k, v := range vars { - all[k] = v - } - } - return all, nil -} diff --git a/client/allocrunnerdeprecated/taskrunner/consul_template_test.go b/client/allocrunnerdeprecated/taskrunner/consul_template_test.go deleted file mode 100644 index 6e35d5af9255..000000000000 --- a/client/allocrunnerdeprecated/taskrunner/consul_template_test.go +++ /dev/null @@ -1,1323 +0,0 @@ -// +build deprecated - -package taskrunner - -import ( - "fmt" - "io" - "io/ioutil" - "os" - "path/filepath" - "strings" - "testing" - "time" - - ctestutil "github.com/hashicorp/consul/testutil" - "github.com/hashicorp/nomad/client/config" - "github.com/hashicorp/nomad/client/taskenv" - "github.com/hashicorp/nomad/helper" - "github.com/hashicorp/nomad/nomad/mock" - "github.com/hashicorp/nomad/nomad/structs" - sconfig "github.com/hashicorp/nomad/nomad/structs/config" - "github.com/hashicorp/nomad/testutil" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" -) - -const ( - // TestTaskName is the name of the injected task. It should appear in the - // environment variable $NOMAD_TASK_NAME - TestTaskName = "test-task" -) - -// MockTaskHooks is a mock of the TaskHooks interface useful for testing -type MockTaskHooks struct { - Restarts int - RestartCh chan struct{} - - Signals []os.Signal - SignalCh chan struct{} - - // SignalError is returned when Signal is called on the mock hook - SignalError error - - UnblockCh chan struct{} - Unblocked bool - - KillReason string - KillCh chan struct{} - - Events []string - EmitEventCh chan struct{} -} - -func NewMockTaskHooks() *MockTaskHooks { - return &MockTaskHooks{ - UnblockCh: make(chan struct{}, 1), - RestartCh: make(chan struct{}, 1), - SignalCh: make(chan struct{}, 1), - KillCh: make(chan struct{}, 1), - EmitEventCh: make(chan struct{}, 1), - } -} -func (m *MockTaskHooks) Restart(source, reason string, failure bool) { - m.Restarts++ - select { - case m.RestartCh <- struct{}{}: - default: - } -} - -func (m *MockTaskHooks) Signal(source, reason string, s os.Signal) error { - m.Signals = append(m.Signals, s) - select { - case m.SignalCh <- struct{}{}: - default: - } - - return m.SignalError -} - -func (m *MockTaskHooks) Kill(source, reason string, fail bool) { - m.KillReason = reason - select { - case m.KillCh <- struct{}{}: - default: - } -} - -func (m *MockTaskHooks) UnblockStart(source string) { - if !m.Unblocked { - close(m.UnblockCh) - } - - m.Unblocked = true -} - -func (m *MockTaskHooks) EmitEvent(source, message string) { - m.Events = append(m.Events, message) - select { - case m.EmitEventCh <- struct{}{}: - default: - } -} - -// testHarness is used to test the TaskTemplateManager by spinning up -// Consul/Vault as needed -type testHarness struct { - manager *TaskTemplateManager - mockHooks *MockTaskHooks - templates []*structs.Template - envBuilder *taskenv.Builder - node *structs.Node - config *config.Config - vaultToken string - taskDir string - vault *testutil.TestVault - consul *ctestutil.TestServer - emitRate time.Duration -} - -// newTestHarness returns a harness starting a dev consul and vault server, -// building the appropriate config and creating a TaskTemplateManager -func newTestHarness(t *testing.T, templates []*structs.Template, consul, vault bool) *testHarness { - region := "global" - harness := &testHarness{ - mockHooks: NewMockTaskHooks(), - templates: templates, - node: mock.Node(), - config: &config.Config{Region: region}, - emitRate: DefaultMaxTemplateEventRate, - } - - // Build the task environment - a := mock.Alloc() - task := a.Job.TaskGroups[0].Tasks[0] - task.Name = TestTaskName - harness.envBuilder = taskenv.NewBuilder(harness.node, a, task, region) - - // Make a tempdir - d, err := ioutil.TempDir("", "ct_test") - if err != nil { - t.Fatalf("Failed to make tmpdir: %v", err) - } - harness.taskDir = d - - if consul { - harness.consul, err = ctestutil.NewTestServer() - if err != nil { - t.Fatalf("error starting test Consul server: %v", err) - } - harness.config.ConsulConfig = &sconfig.ConsulConfig{ - Addr: harness.consul.HTTPAddr, - } - } - - if vault { - harness.vault = testutil.NewTestVault(t) - harness.config.VaultConfig = harness.vault.Config - harness.vaultToken = harness.vault.RootToken - } - - return harness -} - -func (h *testHarness) start(t *testing.T) { - if err := h.startWithErr(); err != nil { - t.Fatalf("failed to build task template manager: %v", err) - } -} - -func (h *testHarness) startWithErr() error { - var err error - h.manager, err = NewTaskTemplateManager(&TaskTemplateManagerConfig{ - Hooks: h.mockHooks, - Templates: h.templates, - ClientConfig: h.config, - VaultToken: h.vaultToken, - TaskDir: h.taskDir, - EnvBuilder: h.envBuilder, - MaxTemplateEventRate: h.emitRate, - retryRate: 10 * time.Millisecond, - }) - - return err -} - -func (h *testHarness) setEmitRate(d time.Duration) { - h.emitRate = d -} - -// stop is used to stop any running Vault or Consul server plus the task manager -func (h *testHarness) stop() { - if h.vault != nil { - h.vault.Stop() - } - if h.consul != nil { - h.consul.Stop() - } - if h.manager != nil { - h.manager.Stop() - } - if h.taskDir != "" { - os.RemoveAll(h.taskDir) - } -} - -func TestTaskTemplateManager_InvalidConfig(t *testing.T) { - t.Parallel() - hooks := NewMockTaskHooks() - clientConfig := &config.Config{Region: "global"} - taskDir := "foo" - a := mock.Alloc() - envBuilder := taskenv.NewBuilder(mock.Node(), a, a.Job.TaskGroups[0].Tasks[0], clientConfig.Region) - - cases := []struct { - name string - config *TaskTemplateManagerConfig - expectedErr string - }{ - { - name: "nil config", - config: nil, - expectedErr: "Nil config passed", - }, - { - name: "bad hooks", - config: &TaskTemplateManagerConfig{ - ClientConfig: clientConfig, - TaskDir: taskDir, - EnvBuilder: envBuilder, - MaxTemplateEventRate: DefaultMaxTemplateEventRate, - }, - expectedErr: "task hooks", - }, - { - name: "bad client config", - config: &TaskTemplateManagerConfig{ - Hooks: hooks, - TaskDir: taskDir, - EnvBuilder: envBuilder, - MaxTemplateEventRate: DefaultMaxTemplateEventRate, - }, - expectedErr: "client config", - }, - { - name: "bad task dir", - config: &TaskTemplateManagerConfig{ - ClientConfig: clientConfig, - Hooks: hooks, - EnvBuilder: envBuilder, - MaxTemplateEventRate: DefaultMaxTemplateEventRate, - }, - expectedErr: "task directory", - }, - { - name: "bad env builder", - config: &TaskTemplateManagerConfig{ - ClientConfig: clientConfig, - Hooks: hooks, - TaskDir: taskDir, - MaxTemplateEventRate: DefaultMaxTemplateEventRate, - }, - expectedErr: "task environment", - }, - { - name: "bad max event rate", - config: &TaskTemplateManagerConfig{ - ClientConfig: clientConfig, - Hooks: hooks, - TaskDir: taskDir, - EnvBuilder: envBuilder, - }, - expectedErr: "template event rate", - }, - { - name: "valid", - config: &TaskTemplateManagerConfig{ - ClientConfig: clientConfig, - Hooks: hooks, - TaskDir: taskDir, - EnvBuilder: envBuilder, - MaxTemplateEventRate: DefaultMaxTemplateEventRate, - }, - }, - { - name: "invalid signal", - config: &TaskTemplateManagerConfig{ - Templates: []*structs.Template{ - { - DestPath: "foo", - EmbeddedTmpl: "hello, world", - ChangeMode: structs.TemplateChangeModeSignal, - ChangeSignal: "foobarbaz", - }, - }, - ClientConfig: clientConfig, - Hooks: hooks, - TaskDir: taskDir, - EnvBuilder: envBuilder, - MaxTemplateEventRate: DefaultMaxTemplateEventRate, - }, - expectedErr: "parse signal", - }, - } - - for _, c := range cases { - t.Run(c.name, func(t *testing.T) { - _, err := NewTaskTemplateManager(c.config) - if err != nil { - if c.expectedErr == "" { - t.Fatalf("unexpected error: %v", err) - } else if !strings.Contains(err.Error(), c.expectedErr) { - t.Fatalf("expected error to contain %q; got %q", c.expectedErr, err.Error()) - } - } else if c.expectedErr != "" { - t.Fatalf("expected an error to contain %q", c.expectedErr) - } - }) - } -} - -func TestTaskTemplateManager_HostPath(t *testing.T) { - t.Parallel() - // Make a template that will render immediately and write it to a tmp file - f, err := ioutil.TempFile("", "") - if err != nil { - t.Fatalf("Bad: %v", err) - } - defer f.Close() - defer os.Remove(f.Name()) - - content := "hello, world!" - if _, err := io.WriteString(f, content); err != nil { - t.Fatalf("Bad: %v", err) - } - - file := "my.tmpl" - template := &structs.Template{ - SourcePath: f.Name(), - DestPath: file, - ChangeMode: structs.TemplateChangeModeNoop, - } - - harness := newTestHarness(t, []*structs.Template{template}, false, false) - harness.start(t) - defer harness.stop() - - // Wait for the unblock - select { - case <-harness.mockHooks.UnblockCh: - case <-time.After(time.Duration(5*testutil.TestMultiplier()) * time.Second): - t.Fatalf("Task unblock should have been called") - } - - // Check the file is there - path := filepath.Join(harness.taskDir, file) - raw, err := ioutil.ReadFile(path) - if err != nil { - t.Fatalf("Failed to read rendered template from %q: %v", path, err) - } - - if s := string(raw); s != content { - t.Fatalf("Unexpected template data; got %q, want %q", s, content) - } - - // Change the config to disallow host sources - harness = newTestHarness(t, []*structs.Template{template}, false, false) - harness.config.Options = map[string]string{ - hostSrcOption: "false", - } - if err := harness.startWithErr(); err == nil || !strings.Contains(err.Error(), "absolute") { - t.Fatalf("Expected absolute template path disallowed: %v", err) - } -} - -func TestTaskTemplateManager_Unblock_Static(t *testing.T) { - t.Parallel() - // Make a template that will render immediately - content := "hello, world!" - file := "my.tmpl" - template := &structs.Template{ - EmbeddedTmpl: content, - DestPath: file, - ChangeMode: structs.TemplateChangeModeNoop, - } - - harness := newTestHarness(t, []*structs.Template{template}, false, false) - harness.start(t) - defer harness.stop() - - // Wait for the unblock - select { - case <-harness.mockHooks.UnblockCh: - case <-time.After(time.Duration(5*testutil.TestMultiplier()) * time.Second): - t.Fatalf("Task unblock should have been called") - } - - // Check the file is there - path := filepath.Join(harness.taskDir, file) - raw, err := ioutil.ReadFile(path) - if err != nil { - t.Fatalf("Failed to read rendered template from %q: %v", path, err) - } - - if s := string(raw); s != content { - t.Fatalf("Unexpected template data; got %q, want %q", s, content) - } -} - -func TestTaskTemplateManager_Permissions(t *testing.T) { - t.Parallel() - // Make a template that will render immediately - content := "hello, world!" - file := "my.tmpl" - template := &structs.Template{ - EmbeddedTmpl: content, - DestPath: file, - ChangeMode: structs.TemplateChangeModeNoop, - Perms: "777", - } - - harness := newTestHarness(t, []*structs.Template{template}, false, false) - harness.start(t) - defer harness.stop() - - // Wait for the unblock - select { - case <-harness.mockHooks.UnblockCh: - case <-time.After(time.Duration(5*testutil.TestMultiplier()) * time.Second): - t.Fatalf("Task unblock should have been called") - } - - // Check the file is there - path := filepath.Join(harness.taskDir, file) - fi, err := os.Stat(path) - if err != nil { - t.Fatalf("Failed to stat file: %v", err) - } - - if m := fi.Mode(); m != os.ModePerm { - t.Fatalf("Got mode %v; want %v", m, os.ModePerm) - } -} - -func TestTaskTemplateManager_Unblock_Static_NomadEnv(t *testing.T) { - t.Parallel() - // Make a template that will render immediately - content := `Hello Nomad Task: {{env "NOMAD_TASK_NAME"}}` - expected := fmt.Sprintf("Hello Nomad Task: %s", TestTaskName) - file := "my.tmpl" - template := &structs.Template{ - EmbeddedTmpl: content, - DestPath: file, - ChangeMode: structs.TemplateChangeModeNoop, - } - - harness := newTestHarness(t, []*structs.Template{template}, false, false) - harness.start(t) - defer harness.stop() - - // Wait for the unblock - select { - case <-harness.mockHooks.UnblockCh: - case <-time.After(time.Duration(5*testutil.TestMultiplier()) * time.Second): - t.Fatalf("Task unblock should have been called") - } - - // Check the file is there - path := filepath.Join(harness.taskDir, file) - raw, err := ioutil.ReadFile(path) - if err != nil { - t.Fatalf("Failed to read rendered template from %q: %v", path, err) - } - - if s := string(raw); s != expected { - t.Fatalf("Unexpected template data; got %q, want %q", s, expected) - } -} - -func TestTaskTemplateManager_Unblock_Static_AlreadyRendered(t *testing.T) { - t.Parallel() - // Make a template that will render immediately - content := "hello, world!" - file := "my.tmpl" - template := &structs.Template{ - EmbeddedTmpl: content, - DestPath: file, - ChangeMode: structs.TemplateChangeModeNoop, - } - - harness := newTestHarness(t, []*structs.Template{template}, false, false) - - // Write the contents - path := filepath.Join(harness.taskDir, file) - if err := ioutil.WriteFile(path, []byte(content), 0777); err != nil { - t.Fatalf("Failed to write data: %v", err) - } - - harness.start(t) - defer harness.stop() - - // Wait for the unblock - select { - case <-harness.mockHooks.UnblockCh: - case <-time.After(time.Duration(5*testutil.TestMultiplier()) * time.Second): - t.Fatalf("Task unblock should have been called") - } - - // Check the file is there - path = filepath.Join(harness.taskDir, file) - raw, err := ioutil.ReadFile(path) - if err != nil { - t.Fatalf("Failed to read rendered template from %q: %v", path, err) - } - - if s := string(raw); s != content { - t.Fatalf("Unexpected template data; got %q, want %q", s, content) - } -} - -func TestTaskTemplateManager_Unblock_Consul(t *testing.T) { - t.Parallel() - // Make a template that will render based on a key in Consul - key := "foo" - content := "barbaz" - embedded := fmt.Sprintf(`{{key "%s"}}`, key) - file := "my.tmpl" - template := &structs.Template{ - EmbeddedTmpl: embedded, - DestPath: file, - ChangeMode: structs.TemplateChangeModeNoop, - } - - harness := newTestHarness(t, []*structs.Template{template}, true, false) - harness.start(t) - defer harness.stop() - - // Ensure no unblock - select { - case <-harness.mockHooks.UnblockCh: - t.Fatalf("Task unblock should have not have been called") - case <-time.After(time.Duration(1*testutil.TestMultiplier()) * time.Second): - } - - // Write the key to Consul - harness.consul.SetKV(t, key, []byte(content)) - - // Wait for the unblock - select { - case <-harness.mockHooks.UnblockCh: - case <-time.After(time.Duration(5*testutil.TestMultiplier()) * time.Second): - t.Fatalf("Task unblock should have been called") - } - - // Check the file is there - path := filepath.Join(harness.taskDir, file) - raw, err := ioutil.ReadFile(path) - if err != nil { - t.Fatalf("Failed to read rendered template from %q: %v", path, err) - } - - if s := string(raw); s != content { - t.Fatalf("Unexpected template data; got %q, want %q", s, content) - } -} - -func TestTaskTemplateManager_Unblock_Vault(t *testing.T) { - t.Parallel() - require := require.New(t) - // Make a template that will render based on a key in Vault - vaultPath := "secret/data/password" - key := "password" - content := "barbaz" - embedded := fmt.Sprintf(`{{with secret "%s"}}{{.Data.data.%s}}{{end}}`, vaultPath, key) - file := "my.tmpl" - template := &structs.Template{ - EmbeddedTmpl: embedded, - DestPath: file, - ChangeMode: structs.TemplateChangeModeNoop, - } - - harness := newTestHarness(t, []*structs.Template{template}, false, true) - harness.start(t) - defer harness.stop() - - // Ensure no unblock - select { - case <-harness.mockHooks.UnblockCh: - t.Fatalf("Task unblock should not have been called") - case <-time.After(time.Duration(1*testutil.TestMultiplier()) * time.Second): - } - - // Write the secret to Vault - logical := harness.vault.Client.Logical() - _, err := logical.Write(vaultPath, map[string]interface{}{"data": map[string]interface{}{key: content}}) - require.NoError(err) - - // Wait for the unblock - select { - case <-harness.mockHooks.UnblockCh: - case <-time.After(time.Duration(5*testutil.TestMultiplier()) * time.Second): - t.Fatalf("Task unblock should have been called") - } - - // Check the file is there - path := filepath.Join(harness.taskDir, file) - raw, err := ioutil.ReadFile(path) - if err != nil { - t.Fatalf("Failed to read rendered template from %q: %v", path, err) - } - - if s := string(raw); s != content { - t.Fatalf("Unexpected template data; got %q, want %q", s, content) - } -} - -func TestTaskTemplateManager_Unblock_Multi_Template(t *testing.T) { - t.Parallel() - // Make a template that will render immediately - staticContent := "hello, world!" - staticFile := "my.tmpl" - template := &structs.Template{ - EmbeddedTmpl: staticContent, - DestPath: staticFile, - ChangeMode: structs.TemplateChangeModeNoop, - } - - // Make a template that will render based on a key in Consul - consulKey := "foo" - consulContent := "barbaz" - consulEmbedded := fmt.Sprintf(`{{key "%s"}}`, consulKey) - consulFile := "consul.tmpl" - template2 := &structs.Template{ - EmbeddedTmpl: consulEmbedded, - DestPath: consulFile, - ChangeMode: structs.TemplateChangeModeNoop, - } - - harness := newTestHarness(t, []*structs.Template{template, template2}, true, false) - harness.start(t) - defer harness.stop() - - // Ensure no unblock - select { - case <-harness.mockHooks.UnblockCh: - t.Fatalf("Task unblock should have not have been called") - case <-time.After(time.Duration(1*testutil.TestMultiplier()) * time.Second): - } - - // Check that the static file has been rendered - path := filepath.Join(harness.taskDir, staticFile) - raw, err := ioutil.ReadFile(path) - if err != nil { - t.Fatalf("Failed to read rendered template from %q: %v", path, err) - } - - if s := string(raw); s != staticContent { - t.Fatalf("Unexpected template data; got %q, want %q", s, staticContent) - } - - // Write the key to Consul - harness.consul.SetKV(t, consulKey, []byte(consulContent)) - - // Wait for the unblock - select { - case <-harness.mockHooks.UnblockCh: - case <-time.After(time.Duration(5*testutil.TestMultiplier()) * time.Second): - t.Fatalf("Task unblock should have been called") - } - - // Check the consul file is there - path = filepath.Join(harness.taskDir, consulFile) - raw, err = ioutil.ReadFile(path) - if err != nil { - t.Fatalf("Failed to read rendered template from %q: %v", path, err) - } - - if s := string(raw); s != consulContent { - t.Fatalf("Unexpected template data; got %q, want %q", s, consulContent) - } -} - -func TestTaskTemplateManager_Rerender_Noop(t *testing.T) { - t.Parallel() - // Make a template that will render based on a key in Consul - key := "foo" - content1 := "bar" - content2 := "baz" - embedded := fmt.Sprintf(`{{key "%s"}}`, key) - file := "my.tmpl" - template := &structs.Template{ - EmbeddedTmpl: embedded, - DestPath: file, - ChangeMode: structs.TemplateChangeModeNoop, - } - - harness := newTestHarness(t, []*structs.Template{template}, true, false) - harness.start(t) - defer harness.stop() - - // Ensure no unblock - select { - case <-harness.mockHooks.UnblockCh: - t.Fatalf("Task unblock should have not have been called") - case <-time.After(time.Duration(1*testutil.TestMultiplier()) * time.Second): - } - - // Write the key to Consul - harness.consul.SetKV(t, key, []byte(content1)) - - // Wait for the unblock - select { - case <-harness.mockHooks.UnblockCh: - case <-time.After(time.Duration(5*testutil.TestMultiplier()) * time.Second): - t.Fatalf("Task unblock should have been called") - } - - // Check the file is there - path := filepath.Join(harness.taskDir, file) - raw, err := ioutil.ReadFile(path) - if err != nil { - t.Fatalf("Failed to read rendered template from %q: %v", path, err) - } - - if s := string(raw); s != content1 { - t.Fatalf("Unexpected template data; got %q, want %q", s, content1) - } - - // Update the key in Consul - harness.consul.SetKV(t, key, []byte(content2)) - - select { - case <-harness.mockHooks.RestartCh: - t.Fatalf("Noop ignored: %+v", harness.mockHooks) - case <-harness.mockHooks.SignalCh: - t.Fatalf("Noop ignored: %+v", harness.mockHooks) - case <-time.After(time.Duration(1*testutil.TestMultiplier()) * time.Second): - } - - // Check the file has been updated - path = filepath.Join(harness.taskDir, file) - raw, err = ioutil.ReadFile(path) - if err != nil { - t.Fatalf("Failed to read rendered template from %q: %v", path, err) - } - - if s := string(raw); s != content2 { - t.Fatalf("Unexpected template data; got %q, want %q", s, content2) - } -} - -func TestTaskTemplateManager_Rerender_Signal(t *testing.T) { - t.Parallel() - // Make a template that renders based on a key in Consul and sends SIGALRM - key1 := "foo" - content1_1 := "bar" - content1_2 := "baz" - embedded1 := fmt.Sprintf(`{{key "%s"}}`, key1) - file1 := "my.tmpl" - template := &structs.Template{ - EmbeddedTmpl: embedded1, - DestPath: file1, - ChangeMode: structs.TemplateChangeModeSignal, - ChangeSignal: "SIGALRM", - } - - // Make a template that renders based on a key in Consul and sends SIGBUS - key2 := "bam" - content2_1 := "cat" - content2_2 := "dog" - embedded2 := fmt.Sprintf(`{{key "%s"}}`, key2) - file2 := "my-second.tmpl" - template2 := &structs.Template{ - EmbeddedTmpl: embedded2, - DestPath: file2, - ChangeMode: structs.TemplateChangeModeSignal, - ChangeSignal: "SIGBUS", - } - - harness := newTestHarness(t, []*structs.Template{template, template2}, true, false) - harness.start(t) - defer harness.stop() - - // Ensure no unblock - select { - case <-harness.mockHooks.UnblockCh: - t.Fatalf("Task unblock should have not have been called") - case <-time.After(time.Duration(1*testutil.TestMultiplier()) * time.Second): - } - - // Write the key to Consul - harness.consul.SetKV(t, key1, []byte(content1_1)) - harness.consul.SetKV(t, key2, []byte(content2_1)) - - // Wait for the unblock - select { - case <-harness.mockHooks.UnblockCh: - case <-time.After(time.Duration(5*testutil.TestMultiplier()) * time.Second): - t.Fatalf("Task unblock should have been called") - } - - if len(harness.mockHooks.Signals) != 0 { - t.Fatalf("Should not have received any signals: %+v", harness.mockHooks) - } - - // Update the keys in Consul - harness.consul.SetKV(t, key1, []byte(content1_2)) - harness.consul.SetKV(t, key2, []byte(content2_2)) - - // Wait for signals - timeout := time.After(time.Duration(1*testutil.TestMultiplier()) * time.Second) -OUTER: - for { - select { - case <-harness.mockHooks.RestartCh: - t.Fatalf("Restart with signal policy: %+v", harness.mockHooks) - case <-harness.mockHooks.SignalCh: - if len(harness.mockHooks.Signals) != 2 { - continue - } - break OUTER - case <-timeout: - t.Fatalf("Should have received two signals: %+v", harness.mockHooks) - } - } - - // Check the files have been updated - path := filepath.Join(harness.taskDir, file1) - raw, err := ioutil.ReadFile(path) - if err != nil { - t.Fatalf("Failed to read rendered template from %q: %v", path, err) - } - - if s := string(raw); s != content1_2 { - t.Fatalf("Unexpected template data; got %q, want %q", s, content1_2) - } - - path = filepath.Join(harness.taskDir, file2) - raw, err = ioutil.ReadFile(path) - if err != nil { - t.Fatalf("Failed to read rendered template from %q: %v", path, err) - } - - if s := string(raw); s != content2_2 { - t.Fatalf("Unexpected template data; got %q, want %q", s, content2_2) - } -} - -func TestTaskTemplateManager_Rerender_Restart(t *testing.T) { - t.Parallel() - // Make a template that renders based on a key in Consul and sends restart - key1 := "bam" - content1_1 := "cat" - content1_2 := "dog" - embedded1 := fmt.Sprintf(`{{key "%s"}}`, key1) - file1 := "my.tmpl" - template := &structs.Template{ - EmbeddedTmpl: embedded1, - DestPath: file1, - ChangeMode: structs.TemplateChangeModeRestart, - } - - harness := newTestHarness(t, []*structs.Template{template}, true, false) - harness.start(t) - defer harness.stop() - - // Ensure no unblock - select { - case <-harness.mockHooks.UnblockCh: - t.Fatalf("Task unblock should have not have been called") - case <-time.After(time.Duration(1*testutil.TestMultiplier()) * time.Second): - } - - // Write the key to Consul - harness.consul.SetKV(t, key1, []byte(content1_1)) - - // Wait for the unblock - select { - case <-harness.mockHooks.UnblockCh: - case <-time.After(time.Duration(5*testutil.TestMultiplier()) * time.Second): - t.Fatalf("Task unblock should have been called") - } - - // Update the keys in Consul - harness.consul.SetKV(t, key1, []byte(content1_2)) - - // Wait for restart - timeout := time.After(time.Duration(1*testutil.TestMultiplier()) * time.Second) -OUTER: - for { - select { - case <-harness.mockHooks.RestartCh: - break OUTER - case <-harness.mockHooks.SignalCh: - t.Fatalf("Signal with restart policy: %+v", harness.mockHooks) - case <-timeout: - t.Fatalf("Should have received a restart: %+v", harness.mockHooks) - } - } - - // Check the files have been updated - path := filepath.Join(harness.taskDir, file1) - raw, err := ioutil.ReadFile(path) - if err != nil { - t.Fatalf("Failed to read rendered template from %q: %v", path, err) - } - - if s := string(raw); s != content1_2 { - t.Fatalf("Unexpected template data; got %q, want %q", s, content1_2) - } -} - -func TestTaskTemplateManager_Interpolate_Destination(t *testing.T) { - t.Parallel() - // Make a template that will have its destination interpolated - content := "hello, world!" - file := "${node.unique.id}.tmpl" - template := &structs.Template{ - EmbeddedTmpl: content, - DestPath: file, - ChangeMode: structs.TemplateChangeModeNoop, - } - - harness := newTestHarness(t, []*structs.Template{template}, false, false) - harness.start(t) - defer harness.stop() - - // Ensure unblock - select { - case <-harness.mockHooks.UnblockCh: - case <-time.After(time.Duration(5*testutil.TestMultiplier()) * time.Second): - t.Fatalf("Task unblock should have been called") - } - - // Check the file is there - actual := fmt.Sprintf("%s.tmpl", harness.node.ID) - path := filepath.Join(harness.taskDir, actual) - raw, err := ioutil.ReadFile(path) - if err != nil { - t.Fatalf("Failed to read rendered template from %q: %v", path, err) - } - - if s := string(raw); s != content { - t.Fatalf("Unexpected template data; got %q, want %q", s, content) - } -} - -func TestTaskTemplateManager_Signal_Error(t *testing.T) { - t.Parallel() - // Make a template that renders based on a key in Consul and sends SIGALRM - key1 := "foo" - content1 := "bar" - content2 := "baz" - embedded1 := fmt.Sprintf(`{{key "%s"}}`, key1) - file1 := "my.tmpl" - template := &structs.Template{ - EmbeddedTmpl: embedded1, - DestPath: file1, - ChangeMode: structs.TemplateChangeModeSignal, - ChangeSignal: "SIGALRM", - } - - harness := newTestHarness(t, []*structs.Template{template}, true, false) - harness.start(t) - defer harness.stop() - - harness.mockHooks.SignalError = fmt.Errorf("test error") - - // Write the key to Consul - harness.consul.SetKV(t, key1, []byte(content1)) - - // Wait a little - select { - case <-harness.mockHooks.UnblockCh: - case <-time.After(time.Duration(2*testutil.TestMultiplier()) * time.Second): - t.Fatalf("Should have received unblock: %+v", harness.mockHooks) - } - - // Write the key to Consul - harness.consul.SetKV(t, key1, []byte(content2)) - - // Wait for kill channel - select { - case <-harness.mockHooks.KillCh: - break - case <-time.After(time.Duration(1*testutil.TestMultiplier()) * time.Second): - t.Fatalf("Should have received a signals: %+v", harness.mockHooks) - } - - if !strings.Contains(harness.mockHooks.KillReason, "Sending signals") { - t.Fatalf("Unexpected error: %v", harness.mockHooks.KillReason) - } -} - -// TestTaskTemplateManager_Env asserts templates with the env flag set are read -// into the task's environment. -func TestTaskTemplateManager_Env(t *testing.T) { - t.Parallel() - template := &structs.Template{ - EmbeddedTmpl: ` -# Comment lines are ok - -FOO=bar -foo=123 -ANYTHING_goes=Spaces are=ok! -`, - DestPath: "test.env", - ChangeMode: structs.TemplateChangeModeNoop, - Envvars: true, - } - harness := newTestHarness(t, []*structs.Template{template}, true, false) - harness.start(t) - defer harness.stop() - - // Wait a little - select { - case <-harness.mockHooks.UnblockCh: - case <-time.After(time.Duration(2*testutil.TestMultiplier()) * time.Second): - t.Fatalf("Should have received unblock: %+v", harness.mockHooks) - } - - // Validate environment - env := harness.envBuilder.Build().Map() - if len(env) < 3 { - t.Fatalf("expected at least 3 env vars but found %d:\n%#v\n", len(env), env) - } - if env["FOO"] != "bar" { - t.Errorf("expected FOO=bar but found %q", env["FOO"]) - } - if env["foo"] != "123" { - t.Errorf("expected foo=123 but found %q", env["foo"]) - } - if env["ANYTHING_goes"] != "Spaces are=ok!" { - t.Errorf("expected ANYTHING_GOES='Spaces are ok!' but found %q", env["ANYTHING_goes"]) - } -} - -// TestTaskTemplateManager_Env_Missing asserts the core env -// template processing function returns errors when files don't exist -func TestTaskTemplateManager_Env_Missing(t *testing.T) { - t.Parallel() - d, err := ioutil.TempDir("", "ct_env_missing") - if err != nil { - t.Fatalf("err: %v", err) - } - defer os.RemoveAll(d) - - // Fake writing the file so we don't have to run the whole template manager - err = ioutil.WriteFile(filepath.Join(d, "exists.env"), []byte("FOO=bar\n"), 0644) - if err != nil { - t.Fatalf("error writing template file: %v", err) - } - - templates := []*structs.Template{ - { - EmbeddedTmpl: "FOO=bar\n", - DestPath: "exists.env", - Envvars: true, - }, - { - EmbeddedTmpl: "WHAT=ever\n", - DestPath: "missing.env", - Envvars: true, - }, - } - - if vars, err := loadTemplateEnv(templates, d); err == nil { - t.Fatalf("expected an error but instead got env vars: %#v", vars) - } -} - -// TestTaskTemplateManager_Env_Multi asserts the core env -// template processing function returns combined env vars from multiple -// templates correctly. -func TestTaskTemplateManager_Env_Multi(t *testing.T) { - t.Parallel() - d, err := ioutil.TempDir("", "ct_env_missing") - if err != nil { - t.Fatalf("err: %v", err) - } - defer os.RemoveAll(d) - - // Fake writing the files so we don't have to run the whole template manager - err = ioutil.WriteFile(filepath.Join(d, "zzz.env"), []byte("FOO=bar\nSHARED=nope\n"), 0644) - if err != nil { - t.Fatalf("error writing template file 1: %v", err) - } - err = ioutil.WriteFile(filepath.Join(d, "aaa.env"), []byte("BAR=foo\nSHARED=yup\n"), 0644) - if err != nil { - t.Fatalf("error writing template file 2: %v", err) - } - - // Templates will get loaded in order (not alpha sorted) - templates := []*structs.Template{ - { - DestPath: "zzz.env", - Envvars: true, - }, - { - DestPath: "aaa.env", - Envvars: true, - }, - } - - vars, err := loadTemplateEnv(templates, d) - if err != nil { - t.Fatalf("expected an error but instead got env vars: %#v", vars) - } - if vars["FOO"] != "bar" { - t.Errorf("expected FOO=bar but found %q", vars["FOO"]) - } - if vars["BAR"] != "foo" { - t.Errorf("expected BAR=foo but found %q", vars["BAR"]) - } - if vars["SHARED"] != "yup" { - t.Errorf("expected FOO=bar but found %q", vars["yup"]) - } -} - -func TestTaskTemplateManager_Rerender_Env(t *testing.T) { - t.Parallel() - // Make a template that renders based on a key in Consul and sends restart - key1 := "bam" - key2 := "bar" - content1_1 := "cat" - content1_2 := "dog" - t1 := &structs.Template{ - EmbeddedTmpl: ` -FOO={{key "bam"}} -`, - DestPath: "test.env", - ChangeMode: structs.TemplateChangeModeRestart, - Envvars: true, - } - t2 := &structs.Template{ - EmbeddedTmpl: ` -BAR={{key "bar"}} -`, - DestPath: "test2.env", - ChangeMode: structs.TemplateChangeModeRestart, - Envvars: true, - } - - harness := newTestHarness(t, []*structs.Template{t1, t2}, true, false) - harness.start(t) - defer harness.stop() - - // Ensure no unblock - select { - case <-harness.mockHooks.UnblockCh: - t.Fatalf("Task unblock should have not have been called") - case <-time.After(time.Duration(1*testutil.TestMultiplier()) * time.Second): - } - - // Write the key to Consul - harness.consul.SetKV(t, key1, []byte(content1_1)) - harness.consul.SetKV(t, key2, []byte(content1_1)) - - // Wait for the unblock - select { - case <-harness.mockHooks.UnblockCh: - case <-time.After(time.Duration(5*testutil.TestMultiplier()) * time.Second): - t.Fatalf("Task unblock should have been called") - } - - env := harness.envBuilder.Build().Map() - if v, ok := env["FOO"]; !ok || v != content1_1 { - t.Fatalf("Bad env for FOO: %v %v", v, ok) - } - if v, ok := env["BAR"]; !ok || v != content1_1 { - t.Fatalf("Bad env for BAR: %v %v", v, ok) - } - - // Update the keys in Consul - harness.consul.SetKV(t, key1, []byte(content1_2)) - - // Wait for restart - timeout := time.After(time.Duration(1*testutil.TestMultiplier()) * time.Second) -OUTER: - for { - select { - case <-harness.mockHooks.RestartCh: - break OUTER - case <-harness.mockHooks.SignalCh: - t.Fatalf("Signal with restart policy: %+v", harness.mockHooks) - case <-timeout: - t.Fatalf("Should have received a restart: %+v", harness.mockHooks) - } - } - - env = harness.envBuilder.Build().Map() - if v, ok := env["FOO"]; !ok || v != content1_2 { - t.Fatalf("Bad env for FOO: %v %v", v, ok) - } - if v, ok := env["BAR"]; !ok || v != content1_1 { - t.Fatalf("Bad env for BAR: %v %v", v, ok) - } -} - -// TestTaskTemplateManager_Config_ServerName asserts the tls_server_name -// setting is propagated to consul-template's configuration. See #2776 -func TestTaskTemplateManager_Config_ServerName(t *testing.T) { - t.Parallel() - c := config.DefaultConfig() - c.VaultConfig = &sconfig.VaultConfig{ - Enabled: helper.BoolToPtr(true), - Addr: "https://localhost/", - TLSServerName: "notlocalhost", - } - config := &TaskTemplateManagerConfig{ - ClientConfig: c, - VaultToken: "token", - } - ctconf, err := newRunnerConfig(config, nil) - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - - if *ctconf.Vault.SSL.ServerName != c.VaultConfig.TLSServerName { - t.Fatalf("expected %q but found %q", c.VaultConfig.TLSServerName, *ctconf.Vault.SSL.ServerName) - } -} - -// TestTaskTemplateManager_Config_VaultGrace asserts the vault_grace setting is -// propagated to consul-template's configuration. -func TestTaskTemplateManager_Config_VaultGrace(t *testing.T) { - t.Parallel() - assert := assert.New(t) - c := config.DefaultConfig() - c.Node = mock.Node() - c.VaultConfig = &sconfig.VaultConfig{ - Enabled: helper.BoolToPtr(true), - Addr: "https://localhost/", - TLSServerName: "notlocalhost", - } - - alloc := mock.Alloc() - config := &TaskTemplateManagerConfig{ - ClientConfig: c, - VaultToken: "token", - - // Make a template that will render immediately - Templates: []*structs.Template{ - { - EmbeddedTmpl: "bar", - DestPath: "foo", - ChangeMode: structs.TemplateChangeModeNoop, - VaultGrace: 10 * time.Second, - }, - { - EmbeddedTmpl: "baz", - DestPath: "bam", - ChangeMode: structs.TemplateChangeModeNoop, - VaultGrace: 100 * time.Second, - }, - }, - EnvBuilder: taskenv.NewBuilder(c.Node, alloc, alloc.Job.TaskGroups[0].Tasks[0], c.Region), - } - - ctmplMapping, err := parseTemplateConfigs(config) - assert.Nil(err, "Parsing Templates") - - ctconf, err := newRunnerConfig(config, ctmplMapping) - assert.Nil(err, "Building Runner Config") - assert.NotNil(ctconf.Vault.Grace, "Vault Grace Pointer") - assert.Equal(10*time.Second, *ctconf.Vault.Grace, "Vault Grace Value") -} - -func TestTaskTemplateManager_BlockedEvents(t *testing.T) { - t.Parallel() - // Make a template that will render based on a key in Consul - var embedded string - for i := 0; i < 5; i++ { - embedded += fmt.Sprintf(`{{key "%d"}}`, i) - } - - file := "my.tmpl" - template := &structs.Template{ - EmbeddedTmpl: embedded, - DestPath: file, - ChangeMode: structs.TemplateChangeModeNoop, - } - - harness := newTestHarness(t, []*structs.Template{template}, true, false) - harness.setEmitRate(100 * time.Millisecond) - harness.start(t) - defer harness.stop() - - // Ensure that we get a blocked event - select { - case <-harness.mockHooks.UnblockCh: - t.Fatalf("Task unblock should have not have been called") - case <-harness.mockHooks.EmitEventCh: - case <-time.After(time.Duration(1*testutil.TestMultiplier()) * time.Second): - t.Fatalf("timeout") - } - - // Check to see we got a correct message - event := harness.mockHooks.Events[0] - if !strings.Contains(event, "and 2 more") { - t.Fatalf("bad event: %q", event) - } - - // Write 3 keys to Consul - for i := 0; i < 3; i++ { - harness.consul.SetKV(t, fmt.Sprintf("%d", i), []byte{0xa}) - } - - // Ensure that we get a blocked event - select { - case <-harness.mockHooks.UnblockCh: - t.Fatalf("Task unblock should have not have been called") - case <-harness.mockHooks.EmitEventCh: - case <-time.After(time.Duration(1*testutil.TestMultiplier()) * time.Second): - t.Fatalf("timeout") - } - - // Check to see we got a correct message - event = harness.mockHooks.Events[len(harness.mockHooks.Events)-1] - if !strings.Contains(event, "Missing") || strings.Contains(event, "more") { - t.Fatalf("bad event: %q", event) - } -} diff --git a/client/allocrunnerdeprecated/taskrunner/getters.go b/client/allocrunnerdeprecated/taskrunner/getters.go deleted file mode 100644 index 6fb75bc014a1..000000000000 --- a/client/allocrunnerdeprecated/taskrunner/getters.go +++ /dev/null @@ -1,21 +0,0 @@ -// +build deprecated - -package taskrunner - -// Name returns the name of the task -func (r *TaskRunner) Name() string { - if r == nil || r.task == nil { - return "" - } - - return r.task.Name -} - -// IsLeader returns whether the task is a leader task -func (r *TaskRunner) IsLeader() bool { - if r == nil || r.task == nil { - return false - } - - return r.task.Leader -} diff --git a/client/allocrunnerdeprecated/taskrunner/task_runner.go b/client/allocrunnerdeprecated/taskrunner/task_runner.go deleted file mode 100644 index 8e666683eaa1..000000000000 --- a/client/allocrunnerdeprecated/taskrunner/task_runner.go +++ /dev/null @@ -1,1970 +0,0 @@ -// +build deprecated - -package taskrunner - -import ( - "bytes" - "crypto/md5" - "encoding/hex" - "fmt" - "io" - "io/ioutil" - "log" - "os" - "path/filepath" - "strings" - "sync" - "time" - - metrics "github.com/armon/go-metrics" - "github.com/boltdb/bolt" - "github.com/golang/snappy" - "github.com/hashicorp/consul-template/signals" - "github.com/hashicorp/go-multierror" - version "github.com/hashicorp/go-version" - "github.com/hashicorp/nomad/client/allocdir" - "github.com/hashicorp/nomad/client/allocrunner/taskrunner/getter" - "github.com/hashicorp/nomad/client/allocrunner/taskrunner/restarts" - "github.com/hashicorp/nomad/client/config" - consulApi "github.com/hashicorp/nomad/client/consul" - "github.com/hashicorp/nomad/client/driver" - "github.com/hashicorp/nomad/client/vaultclient" - "github.com/hashicorp/nomad/nomad/structs" - "github.com/ugorji/go/codec" - - dstructs "github.com/hashicorp/nomad/client/driver/structs" - cstructs "github.com/hashicorp/nomad/client/structs" - "github.com/hashicorp/nomad/client/taskenv" -) - -const ( - // killBackoffBaseline is the baseline time for exponential backoff while - // killing a task. - killBackoffBaseline = 5 * time.Second - - // killBackoffLimit is the limit of the exponential backoff for killing - // the task. - killBackoffLimit = 2 * time.Minute - - // killFailureLimit is how many times we will attempt to kill a task before - // giving up and potentially leaking resources. - killFailureLimit = 5 - - // vaultBackoffBaseline is the baseline time for exponential backoff when - // attempting to retrieve a Vault token - vaultBackoffBaseline = 5 * time.Second - - // vaultBackoffLimit is the limit of the exponential backoff when attempting - // to retrieve a Vault token - vaultBackoffLimit = 3 * time.Minute - - // vaultTokenFile is the name of the file holding the Vault token inside the - // task's secret directory - vaultTokenFile = "vault_token" -) - -var ( -// taskRunnerStateAllKey holds all the task runners state. At the moment -// there is no need to split it -//taskRunnerStateAllKey = []byte("simple-all") -) - -// taskRestartEvent wraps a TaskEvent with additional metadata to control -// restart behavior. -type taskRestartEvent struct { - // taskEvent to report - taskEvent *structs.TaskEvent - - // if false, don't count against restart count - failure bool -} - -func newTaskRestartEvent(reason string, failure bool) *taskRestartEvent { - return &taskRestartEvent{ - taskEvent: structs.NewTaskEvent(structs.TaskRestartSignal).SetRestartReason(reason), - failure: failure, - } -} - -// TaskRunner is used to wrap a task within an allocation and provide the execution context. -type TaskRunner struct { - stateDB *bolt.DB - config *config.Config - updater TaskStateUpdater - logger *log.Logger - restartTracker *restarts.RestartTracker - consul consulApi.ConsulServiceAPI - - // running marks whether the task is running - running bool - runningLock sync.Mutex - - resourceUsage *cstructs.TaskResourceUsage - resourceUsageLock sync.RWMutex - - alloc *structs.Allocation - task *structs.Task - taskDir *allocdir.TaskDir - - // envBuilder is used to build the task's environment - envBuilder *taskenv.Builder - - // driverNet is the network information returned by the driver - driverNet *cstructs.DriverNetwork - driverNetLock sync.Mutex - - // updateCh is used to receive updated versions of the allocation - updateCh chan *structs.Allocation - - // handle is returned when Starting or Opening a driver - handle driver.DriverHandle - handleLock sync.Mutex - - // artifactsDownloaded tracks whether the tasks artifacts have been - // downloaded - // - // Must acquire persistLock when accessing - artifactsDownloaded bool - - // taskDirBuilt tracks whether the task has built its directory. - // - // Must acquire persistLock when accessing - taskDirBuilt bool - - // createdResources are all the resources created by the task driver - // across all attempts to start the task. - // Simple gets and sets should use {get,set}CreatedResources - createdResources *driver.CreatedResources - createdResourcesLock sync.Mutex - - // payloadRendered tracks whether the payload has been rendered to disk - payloadRendered bool - - // vaultFuture is the means to wait for and get a Vault token - vaultFuture *tokenFuture - - // recoveredVaultToken is the token that was recovered through a restore - recoveredVaultToken string - - // vaultClient is used to retrieve and renew any needed Vault token - vaultClient vaultclient.VaultClient - - // templateManager is used to manage any consul-templates this task may have - templateManager *TaskTemplateManager - - // startCh is used to trigger the start of the task - startCh chan struct{} - - // unblockCh is used to unblock the starting of the task - unblockCh chan struct{} - unblocked bool - unblockLock sync.Mutex - - // restartCh is used to restart a task - restartCh chan *taskRestartEvent - - // signalCh is used to send a signal to a task - signalCh chan SignalEvent - - destroy bool - destroyCh chan struct{} - destroyLock sync.Mutex - destroyEvent *structs.TaskEvent - - // waitCh closing marks the run loop as having exited - waitCh chan struct{} - - // persistLock must be acquired when accessing fields stored by - // SaveState. SaveState is called asynchronously to TaskRunner.Run by - // AllocRunner, so all state fields must be synchronized using this - // lock. - persistLock sync.Mutex - - // persistedHash is the hash of the last persisted snapshot. It is used to - // detect if a new snapshot has to be written to disk. - persistedHash []byte - - // baseLabels are used when emitting tagged metrics. All task runner metrics - // will have these tags, and optionally more. - baseLabels []metrics.Label -} - -// taskRunnerState is used to snapshot the state of the task runner -type taskRunnerState struct { - Version string - HandleID string - ArtifactDownloaded bool - TaskDirBuilt bool - PayloadRendered bool - CreatedResources *driver.CreatedResources - DriverNetwork *cstructs.DriverNetwork -} - -func (s *taskRunnerState) Hash() []byte { - h := md5.New() - - io.WriteString(h, s.Version) - io.WriteString(h, s.HandleID) - io.WriteString(h, fmt.Sprintf("%v", s.ArtifactDownloaded)) - io.WriteString(h, fmt.Sprintf("%v", s.TaskDirBuilt)) - io.WriteString(h, fmt.Sprintf("%v", s.PayloadRendered)) - h.Write(s.CreatedResources.Hash()) - h.Write(s.DriverNetwork.Hash()) - - return h.Sum(nil) -} - -// TaskStateUpdater is used to signal that tasks state has changed. If lazySync -// is set the event won't be immediately pushed to the server. -type TaskStateUpdater func(taskName, state string, event *structs.TaskEvent, lazySync bool) - -// SignalEvent is a tuple of the signal and the event generating it -type SignalEvent struct { - // s is the signal to be sent - s os.Signal - - // e is the task event generating the signal - e *structs.TaskEvent - - // result should be used to send back the result of the signal - result chan<- error -} - -// NewTaskRunner is used to create a new task context -func NewTaskRunner(logger *log.Logger, config *config.Config, - stateDB *bolt.DB, updater TaskStateUpdater, taskDir *allocdir.TaskDir, - alloc *structs.Allocation, task *structs.Task, - vaultClient vaultclient.VaultClient, consulClient consulApi.ConsulServiceAPI) *TaskRunner { - - // Merge in the task resources - task.Resources = alloc.TaskResources[task.Name] - - // Build the restart tracker. - tg := alloc.Job.LookupTaskGroup(alloc.TaskGroup) - if tg == nil { - logger.Printf("[ERR] client: alloc %q for missing task group %q", alloc.ID, alloc.TaskGroup) - return nil - } - restartTracker := restarts.NewRestartTracker(tg.RestartPolicy, alloc.Job.Type) - - // Initialize the environment builder - envBuilder := taskenv.NewBuilder(config.Node, alloc, task, config.Region) - - tc := &TaskRunner{ - config: config, - stateDB: stateDB, - updater: updater, - logger: logger, - restartTracker: restartTracker, - alloc: alloc, - task: task, - taskDir: taskDir, - envBuilder: envBuilder, - createdResources: driver.NewCreatedResources(), - consul: consulClient, - vaultClient: vaultClient, - vaultFuture: NewTokenFuture().Set(""), - updateCh: make(chan *structs.Allocation, 64), - destroyCh: make(chan struct{}), - waitCh: make(chan struct{}), - startCh: make(chan struct{}, 1), - unblockCh: make(chan struct{}), - restartCh: make(chan *taskRestartEvent), - signalCh: make(chan SignalEvent), - } - - tc.baseLabels = []metrics.Label{ - { - Name: "job", - Value: tc.alloc.Job.Name, - }, - { - Name: "task_group", - Value: tc.alloc.TaskGroup, - }, - { - Name: "alloc_id", - Value: tc.alloc.ID, - }, - { - Name: "task", - Value: tc.task.Name, - }, - } - - if tc.alloc.Job.ParentID != "" { - tc.baseLabels = append(tc.baseLabels, metrics.Label{ - Name: "parent_id", - Value: tc.alloc.Job.ParentID, - }) - if strings.Contains(tc.alloc.Job.Name, "/dispatch-") { - tc.baseLabels = append(tc.baseLabels, metrics.Label{ - Name: "dispatch_id", - Value: strings.Split(tc.alloc.Job.Name, "/dispatch-")[1], - }) - } - if strings.Contains(tc.alloc.Job.Name, "/periodic-") { - tc.baseLabels = append(tc.baseLabels, metrics.Label{ - Name: "periodic_id", - Value: strings.Split(tc.alloc.Job.Name, "/periodic-")[1], - }) - } - return tc - } - - return tc -} - -// MarkReceived marks the task as received. -func (r *TaskRunner) MarkReceived() { - // We lazy sync this since there will be a follow up message almost - // immediately. - r.updater(r.task.Name, structs.TaskStatePending, structs.NewTaskEvent(structs.TaskReceived), true) -} - -// WaitCh returns a channel to wait for termination -func (r *TaskRunner) WaitCh() <-chan struct{} { - return r.waitCh -} - -// getHandle returns the task's handle or nil -func (r *TaskRunner) getHandle() driver.DriverHandle { - r.handleLock.Lock() - h := r.handle - r.handleLock.Unlock() - return h -} - -// pre060StateFilePath returns the path to our state file that would have been -// written pre v0.6.0 -// COMPAT: Remove in 0.7.0 -func (r *TaskRunner) pre060StateFilePath() string { - // Get the MD5 of the task name - hashVal := md5.Sum([]byte(r.task.Name)) - hashHex := hex.EncodeToString(hashVal[:]) - dirName := fmt.Sprintf("task-%s", hashHex) - - // Generate the path - return filepath.Join(r.config.StateDir, "alloc", r.alloc.ID, dirName, "state.json") -} - -// RestoreState is used to restore our state. If a non-empty string is returned -// the task is restarted with the string as the reason. This is useful for -// backwards incompatible upgrades that need to restart tasks with a new -// executor. -func (r *TaskRunner) RestoreState() (string, error) { - var snap taskRunnerState - //XXX Deprecated: see allocrunner - //err := r.stateDB.View(func(tx *bolt.Tx) error { - // bkt, err := state.GetTaskBucket(tx, r.alloc.ID, r.task.Name) - // if err != nil { - // return fmt.Errorf("failed to get task bucket: %v", err) - // } - - // if err := state.GetObject(bkt, taskRunnerStateAllKey, &snap); err != nil { - // return fmt.Errorf("failed to read task runner state: %v", err) - // } - // return nil - //}) - //if err != nil { - // return "", err - //} - - // Restore fields from the snapshot - r.artifactsDownloaded = snap.ArtifactDownloaded - r.taskDirBuilt = snap.TaskDirBuilt - r.payloadRendered = snap.PayloadRendered - r.setCreatedResources(snap.CreatedResources) - r.driverNet = snap.DriverNetwork - - if r.task.Vault != nil { - // Read the token from the secret directory - tokenPath := filepath.Join(r.taskDir.SecretsDir, vaultTokenFile) - data, err := ioutil.ReadFile(tokenPath) - if err != nil { - if !os.IsNotExist(err) { - return "", fmt.Errorf("failed to read token for task %q in alloc %q: %v", r.task.Name, r.alloc.ID, err) - } - - // Token file doesn't exist - } else { - // Store the recovered token - r.recoveredVaultToken = string(data) - } - } - - // Restore the driver - restartReason := "" - if snap.HandleID != "" { - d, err := r.createDriver() - if err != nil { - return "", err - } - - // Add the restored network driver to the environment - r.envBuilder.SetDriverNetwork(r.driverNet) - - // Open a connection to the driver handle - ctx := driver.NewExecContext(r.taskDir, r.envBuilder.Build()) - handle, err := d.Open(ctx, snap.HandleID) - - // In the case it fails, we relaunch the task in the Run() method. - if err != nil { - r.logger.Printf("[ERR] client: failed to open handle to task %q for alloc %q: %v", - r.task.Name, r.alloc.ID, err) - return "", nil - } - - if pre06ScriptCheck(snap.Version, r.task.Driver, r.task.Services) { - restartReason = pre06ScriptCheckReason - } - - if err := r.registerServices(d, handle, r.driverNet); err != nil { - // Don't hard fail here as there's a chance this task - // registered with Consul properly when it initial - // started. - r.logger.Printf("[WARN] client: failed to register services and checks with consul for task %q in alloc %q: %v", - r.task.Name, r.alloc.ID, err) - } - - r.handleLock.Lock() - r.handle = handle - r.handleLock.Unlock() - - r.runningLock.Lock() - r.running = true - r.runningLock.Unlock() - } - return restartReason, nil -} - -// ver06 is used for checking for pre-0.6 script checks -var ver06 = version.Must(version.NewVersion("0.6.0dev")) - -// pre06ScriptCheckReason is the restart reason given when a pre-0.6 script -// check is found on an exec/java task. -const pre06ScriptCheckReason = "upgrading pre-0.6 script checks" - -// pre06ScriptCheck returns true if version is prior to 0.6.0dev, has a script -// check, and uses exec or java drivers. -func pre06ScriptCheck(ver, driver string, services []*structs.Service) bool { - if driver != "exec" && driver != "java" && driver != "mock_driver" { - // Only exec and java are affected - return false - } - v, err := version.NewVersion(ver) - if err != nil { - // Treat it as old - return true - } - if !v.LessThan(ver06) { - // >= 0.6.0dev - return false - } - for _, service := range services { - for _, check := range service.Checks { - if check.Type == "script" { - return true - } - } - } - return false -} - -// SaveState is used to snapshot our state -func (r *TaskRunner) SaveState() error { - r.destroyLock.Lock() - defer r.destroyLock.Unlock() - if r.destroy { - // Don't save state if already destroyed - return nil - } - - r.persistLock.Lock() - defer r.persistLock.Unlock() - snap := taskRunnerState{ - Version: r.config.Version.VersionNumber(), - ArtifactDownloaded: r.artifactsDownloaded, - TaskDirBuilt: r.taskDirBuilt, - PayloadRendered: r.payloadRendered, - CreatedResources: r.getCreatedResources(), - } - - r.handleLock.Lock() - if r.handle != nil { - snap.HandleID = r.handle.ID() - } - r.handleLock.Unlock() - - r.driverNetLock.Lock() - snap.DriverNetwork = r.driverNet.Copy() - r.driverNetLock.Unlock() - - // If nothing has changed avoid the write - h := snap.Hash() - if bytes.Equal(h, r.persistedHash) { - return nil - } - - // Serialize the object - var buf bytes.Buffer - if err := codec.NewEncoder(&buf, structs.MsgpackHandle).Encode(&snap); err != nil { - return fmt.Errorf("failed to serialize snapshot: %v", err) - } - - // Start the transaction. - //XXX Deprecated: see allocrunner - return nil - //return r.stateDB.Batch(func(tx *bolt.Tx) error { - // // Grab the task bucket - // taskBkt, err := state.GetTaskBucket(tx, r.alloc.ID, r.task.Name) - // if err != nil { - // return fmt.Errorf("failed to retrieve allocation bucket: %v", err) - // } - - // if err := state.PutData(taskBkt, taskRunnerStateAllKey, buf.Bytes()); err != nil { - // return fmt.Errorf("failed to write task_runner state: %v", err) - // } - - // // Store the hash that was persisted - // tx.OnCommit(func() { - // r.persistedHash = h - // }) - - // return nil - //}) -} - -// DestroyState is used to cleanup after ourselves -func (r *TaskRunner) DestroyState() error { - //r.persistLock.Lock() - //defer r.persistLock.Unlock() - - //return r.stateDB.Update(func(tx *bolt.Tx) error { - // if err := state.DeleteTaskBucket(tx, r.alloc.ID, r.task.Name); err != nil { - // return fmt.Errorf("failed to delete task bucket: %v", err) - // } - // return nil - //}) - //XXX Deprecated: see allocrunner - panic("deprecated") -} - -// setState is used to update the state of the task runner -func (r *TaskRunner) setState(state string, event *structs.TaskEvent, lazySync bool) { - event.PopulateEventDisplayMessage() - - // Persist our state to disk. - if err := r.SaveState(); err != nil { - r.logger.Printf("[ERR] client: failed to save state of Task Runner for task %q: %v", r.task.Name, err) - } - - // Indicate the task has been updated. - r.updater(r.task.Name, state, event, lazySync) -} - -// createDriver makes a driver for the task -func (r *TaskRunner) createDriver() (driver.Driver, error) { - // Create a task-specific event emitter callback to expose minimal - // state to drivers - eventEmitter := func(m string, args ...interface{}) { - msg := fmt.Sprintf(m, args...) - r.logger.Printf("[DEBUG] client: driver event for alloc %q: %s", r.alloc.ID, msg) - r.setState(structs.TaskStatePending, structs.NewTaskEvent(structs.TaskDriverMessage).SetDriverMessage(msg), false) - } - - driverCtx := driver.NewDriverContext(r.alloc.Job.Name, r.alloc.TaskGroup, r.task.Name, r.alloc.ID, r.config, r.config.Node, r.logger, eventEmitter) - d, err := driver.NewDriver(r.task.Driver, driverCtx) - if err != nil { - return nil, fmt.Errorf("failed to create driver '%s' for alloc %s: %v", - r.task.Driver, r.alloc.ID, err) - } - - return d, err -} - -// Run is a long running routine used to manage the task -func (r *TaskRunner) Run() { - defer close(r.waitCh) - r.logger.Printf("[DEBUG] client: starting task context for '%s' (alloc '%s')", - r.task.Name, r.alloc.ID) - - if err := r.validateTask(); err != nil { - r.setState( - structs.TaskStateDead, - structs.NewTaskEvent(structs.TaskFailedValidation).SetValidationError(err).SetFailsTask(), - false) - return - } - - // Create a temporary driver so that we can determine the FSIsolation - // required. run->startTask will create a new driver after environment - // has been setup (env vars, templates, artifacts, secrets, etc). - tmpDrv, err := r.createDriver() - if err != nil { - e := fmt.Errorf("failed to create driver of task %q for alloc %q: %v", r.task.Name, r.alloc.ID, err) - r.setState( - structs.TaskStateDead, - structs.NewTaskEvent(structs.TaskSetupFailure).SetSetupError(e).SetFailsTask(), - false) - return - } - - // Build base task directory structure regardless of FS isolation abilities. - // This needs to happen before we start the Vault manager and call prestart - // as both those can write to the task directories - if err := r.buildTaskDir(tmpDrv.FSIsolation()); err != nil { - e := fmt.Errorf("failed to build task directory for %q: %v", r.task.Name, err) - r.setState( - structs.TaskStateDead, - structs.NewTaskEvent(structs.TaskSetupFailure).SetSetupError(e).SetFailsTask(), - false) - return - } - - // If there is no Vault policy leave the static future created in - // NewTaskRunner - if r.task.Vault != nil { - // Start the go-routine to get a Vault token - r.vaultFuture.Clear() - go r.vaultManager(r.recoveredVaultToken) - } - - // Start the run loop - r.run() - - // Do any cleanup necessary - r.postrun() - - return -} - -// validateTask validates the fields of the task and returns an error if the -// task is invalid. -func (r *TaskRunner) validateTask() error { - var mErr multierror.Error - - // Validate the user. - unallowedUsers := r.config.ReadStringListToMapDefault("user.blacklist", config.DefaultUserBlacklist) - checkDrivers := r.config.ReadStringListToMapDefault("user.checked_drivers", config.DefaultUserCheckedDrivers) - if _, driverMatch := checkDrivers[r.task.Driver]; driverMatch { - if _, unallowed := unallowedUsers[r.task.User]; unallowed { - mErr.Errors = append(mErr.Errors, fmt.Errorf("running as user %q is disallowed", r.task.User)) - } - } - - //XXX Is this the right place for this? Seems like it could be done on - // the server when the job is submitted. Is this defense in depth? - // Validate the artifacts - for i, artifact := range r.task.Artifacts { - // Verify the artifact doesn't escape the task directory. - if err := artifact.Validate(); err != nil { - // If this error occurs there is potentially a server bug or - // malicious, server spoofing. - r.logger.Printf("[ERR] client: allocation %q, task %v, artifact %#v (%v) fails validation: %v", - r.alloc.ID, r.task.Name, artifact, i, err) - mErr.Errors = append(mErr.Errors, fmt.Errorf("artifact (%d) failed validation: %v", i, err)) - } - } - - // Validate the Service names - taskEnv := r.envBuilder.Build() - for i, service := range r.task.Services { - name := taskEnv.ReplaceEnv(service.Name) - if err := service.ValidateName(name); err != nil { - mErr.Errors = append(mErr.Errors, fmt.Errorf("service (%d) failed validation: %v", i, err)) - } - } - - if len(mErr.Errors) == 1 { - return mErr.Errors[0] - } - return mErr.ErrorOrNil() -} - -// tokenFuture stores the Vault token and allows consumers to block till a valid -// token exists -type tokenFuture struct { - waiting []chan struct{} - token string - set bool - m sync.Mutex -} - -// NewTokenFuture returns a new token future without any token set -func NewTokenFuture() *tokenFuture { - return &tokenFuture{} -} - -// Wait returns a channel that can be waited on. When this channel unblocks, a -// valid token will be available via the Get method -func (f *tokenFuture) Wait() <-chan struct{} { - f.m.Lock() - defer f.m.Unlock() - - c := make(chan struct{}) - if f.set { - close(c) - return c - } - - f.waiting = append(f.waiting, c) - return c -} - -// Set sets the token value and unblocks any caller of Wait -func (f *tokenFuture) Set(token string) *tokenFuture { - f.m.Lock() - defer f.m.Unlock() - - f.set = true - f.token = token - for _, w := range f.waiting { - close(w) - } - f.waiting = nil - return f -} - -// Clear clears the set vault token. -func (f *tokenFuture) Clear() *tokenFuture { - f.m.Lock() - defer f.m.Unlock() - - f.token = "" - f.set = false - return f -} - -// Get returns the set Vault token -func (f *tokenFuture) Get() string { - f.m.Lock() - defer f.m.Unlock() - return f.token -} - -// vaultManager should be called in a go-routine and manages the derivation, -// renewal and handling of errors with the Vault token. The optional parameter -// allows setting the initial Vault token. This is useful when the Vault token -// is recovered off disk. -func (r *TaskRunner) vaultManager(token string) { - // Helper for stopping token renewal - stopRenewal := func() { - if err := r.vaultClient.StopRenewToken(r.vaultFuture.Get()); err != nil { - r.logger.Printf("[WARN] client: failed to stop token renewal for task %v in alloc %q: %v", r.task.Name, r.alloc.ID, err) - } - } - - // updatedToken lets us store state between loops. If true, a new token - // has been retrieved and we need to apply the Vault change mode - var updatedToken bool - -OUTER: - for { - // Check if we should exit - select { - case <-r.waitCh: - stopRenewal() - return - default: - } - - // Clear the token - r.vaultFuture.Clear() - - // Check if there already is a token which can be the case for - // restoring the TaskRunner - if token == "" { - // Get a token - var exit bool - token, exit = r.deriveVaultToken() - if exit { - // Exit the manager - return - } - - // Write the token to disk - if err := r.writeToken(token); err != nil { - e := fmt.Errorf("failed to write Vault token to disk") - r.logger.Printf("[ERR] client: %v for task %v on alloc %q: %v", e, r.task.Name, r.alloc.ID, err) - r.Kill("vault", e.Error(), true) - return - } - } - - // Start the renewal process - renewCh, err := r.vaultClient.RenewToken(token, 30) - - // An error returned means the token is not being renewed - if err != nil { - r.logger.Printf("[ERR] client: failed to start renewal of Vault token for task %v on alloc %q: %v", r.task.Name, r.alloc.ID, err) - token = "" - goto OUTER - } - - // The Vault token is valid now, so set it - r.vaultFuture.Set(token) - - if updatedToken { - switch r.task.Vault.ChangeMode { - case structs.VaultChangeModeSignal: - s, err := signals.Parse(r.task.Vault.ChangeSignal) - if err != nil { - e := fmt.Errorf("failed to parse signal: %v", err) - r.logger.Printf("[ERR] client: %v", err) - r.Kill("vault", e.Error(), true) - return - } - - if err := r.Signal("vault", "new Vault token acquired", s); err != nil { - r.logger.Printf("[ERR] client: failed to send signal to task %v for alloc %q: %v", r.task.Name, r.alloc.ID, err) - r.Kill("vault", fmt.Sprintf("failed to send signal to task: %v", err), true) - return - } - case structs.VaultChangeModeRestart: - const noFailure = false - r.Restart("vault", "new Vault token acquired", noFailure) - case structs.VaultChangeModeNoop: - fallthrough - default: - r.logger.Printf("[ERR] client: Invalid Vault change mode: %q", r.task.Vault.ChangeMode) - } - - // We have handled it - updatedToken = false - - // Call the handler - r.updatedTokenHandler() - } - - // Start watching for renewal errors - select { - case err := <-renewCh: - // Clear the token - token = "" - r.logger.Printf("[ERR] client: failed to renew Vault token for task %v on alloc %q: %v", r.task.Name, r.alloc.ID, err) - stopRenewal() - - // Check if we have to do anything - if r.task.Vault.ChangeMode != structs.VaultChangeModeNoop { - updatedToken = true - } - case <-r.waitCh: - stopRenewal() - return - } - } -} - -// deriveVaultToken derives the Vault token using exponential backoffs. It -// returns the Vault token and whether the manager should exit. -func (r *TaskRunner) deriveVaultToken() (token string, exit bool) { - attempts := 0 - for { - tokens, err := r.vaultClient.DeriveToken(r.alloc, []string{r.task.Name}) - if err == nil { - return tokens[r.task.Name], false - } - - // Check if this is a server side error - if structs.IsServerSide(err) { - r.logger.Printf("[ERR] client: failed to derive Vault token for task %v on alloc %q: %v", - r.task.Name, r.alloc.ID, err) - r.Kill("vault", fmt.Sprintf("server error deriving vault token: %v", err), true) - return "", true - } - // Check if we can't recover from the error - if !structs.IsRecoverable(err) { - r.logger.Printf("[ERR] client: failed to derive Vault token for task %v on alloc %q: %v", - r.task.Name, r.alloc.ID, err) - r.Kill("vault", fmt.Sprintf("failed to derive token: %v", err), true) - return "", true - } - - // Handle the retry case - backoff := (1 << (2 * uint64(attempts))) * vaultBackoffBaseline - if backoff > vaultBackoffLimit { - backoff = vaultBackoffLimit - } - r.logger.Printf("[ERR] client: failed to derive Vault token for task %v on alloc %q: %v; retrying in %v", - r.task.Name, r.alloc.ID, err, backoff) - - attempts++ - - // Wait till retrying - select { - case <-r.waitCh: - return "", true - case <-time.After(backoff): - } - } -} - -// writeToken writes the given token to disk -func (r *TaskRunner) writeToken(token string) error { - tokenPath := filepath.Join(r.taskDir.SecretsDir, vaultTokenFile) - if err := ioutil.WriteFile(tokenPath, []byte(token), 0777); err != nil { - return fmt.Errorf("failed to save Vault tokens to secret dir for task %q in alloc %q: %v", r.task.Name, r.alloc.ID, err) - } - - return nil -} - -// updatedTokenHandler is called when a new Vault token is retrieved. Things -// that rely on the token should be updated here. -func (r *TaskRunner) updatedTokenHandler() { - - // Update the tasks environment - r.envBuilder.SetVaultToken(r.vaultFuture.Get(), r.task.Vault.Env) - - if r.templateManager != nil { - r.templateManager.Stop() - - // Create a new templateManager - var err error - r.templateManager, err = NewTaskTemplateManager(&TaskTemplateManagerConfig{ - Hooks: r, - Templates: r.task.Templates, - ClientConfig: r.config, - VaultToken: r.vaultFuture.Get(), - TaskDir: r.taskDir.Dir, - EnvBuilder: r.envBuilder, - MaxTemplateEventRate: DefaultMaxTemplateEventRate, - }) - - if err != nil { - err := fmt.Errorf("failed to build task's template manager: %v", err) - r.setState(structs.TaskStateDead, - structs.NewTaskEvent(structs.TaskSetupFailure).SetSetupError(err).SetFailsTask(), - false) - r.logger.Printf("[ERR] client: alloc %q, task %q %v", r.alloc.ID, r.task.Name, err) - r.Kill("vault", err.Error(), true) - return - } - } -} - -// prestart handles life-cycle tasks that occur before the task has started. -// Since it's run asynchronously with the main Run() loop the alloc & task are -// passed in to avoid racing with updates. -func (r *TaskRunner) prestart(alloc *structs.Allocation, task *structs.Task, resultCh chan bool) { - if task.Vault != nil { - // Wait for the token - r.logger.Printf("[DEBUG] client: waiting for Vault token for task %v in alloc %q", task.Name, alloc.ID) - tokenCh := r.vaultFuture.Wait() - select { - case <-tokenCh: - case <-r.waitCh: - resultCh <- false - return - } - r.logger.Printf("[DEBUG] client: retrieved Vault token for task %v in alloc %q", task.Name, alloc.ID) - r.envBuilder.SetVaultToken(r.vaultFuture.Get(), task.Vault.Env) - } - - // If the job is a dispatch job and there is a payload write it to disk - requirePayload := len(alloc.Job.Payload) != 0 && - (r.task.DispatchPayload != nil && r.task.DispatchPayload.File != "") - if !r.payloadRendered && requirePayload { - renderTo := filepath.Join(r.taskDir.LocalDir, task.DispatchPayload.File) - decoded, err := snappy.Decode(nil, alloc.Job.Payload) - if err != nil { - r.setState( - structs.TaskStateDead, - structs.NewTaskEvent(structs.TaskSetupFailure).SetSetupError(err).SetFailsTask(), - false) - resultCh <- false - return - } - - if err := os.MkdirAll(filepath.Dir(renderTo), 07777); err != nil { - r.setState( - structs.TaskStateDead, - structs.NewTaskEvent(structs.TaskSetupFailure).SetSetupError(err).SetFailsTask(), - false) - resultCh <- false - return - } - - if err := ioutil.WriteFile(renderTo, decoded, 0777); err != nil { - r.setState( - structs.TaskStateDead, - structs.NewTaskEvent(structs.TaskSetupFailure).SetSetupError(err).SetFailsTask(), - false) - resultCh <- false - return - } - - r.payloadRendered = true - } - - for { - r.persistLock.Lock() - downloaded := r.artifactsDownloaded - r.persistLock.Unlock() - - // Download the task's artifacts - if !downloaded && len(task.Artifacts) > 0 { - r.setState(structs.TaskStatePending, structs.NewTaskEvent(structs.TaskDownloadingArtifacts), false) - taskEnv := r.envBuilder.Build() - for _, artifact := range task.Artifacts { - if err := getter.GetArtifact(taskEnv, artifact, r.taskDir.Dir); err != nil { - wrapped := fmt.Errorf("failed to download artifact %q: %v", artifact.GetterSource, err) - r.logger.Printf("[DEBUG] client: %v", wrapped) - r.setState(structs.TaskStatePending, - structs.NewTaskEvent(structs.TaskArtifactDownloadFailed).SetDownloadError(wrapped), false) - r.restartTracker.SetStartError(structs.WrapRecoverable(wrapped.Error(), err)) - goto RESTART - } - } - - r.persistLock.Lock() - r.artifactsDownloaded = true - r.persistLock.Unlock() - } - - // We don't have to wait for any template - if len(task.Templates) == 0 { - // Send the start signal - select { - case r.startCh <- struct{}{}: - default: - } - - resultCh <- true - return - } - - // Build the template manager - if r.templateManager == nil { - var err error - r.templateManager, err = NewTaskTemplateManager(&TaskTemplateManagerConfig{ - Hooks: r, - Templates: r.task.Templates, - ClientConfig: r.config, - VaultToken: r.vaultFuture.Get(), - TaskDir: r.taskDir.Dir, - EnvBuilder: r.envBuilder, - MaxTemplateEventRate: DefaultMaxTemplateEventRate, - }) - if err != nil { - err := fmt.Errorf("failed to build task's template manager: %v", err) - r.setState(structs.TaskStateDead, structs.NewTaskEvent(structs.TaskSetupFailure).SetSetupError(err).SetFailsTask(), false) - r.logger.Printf("[ERR] client: alloc %q, task %q %v", alloc.ID, task.Name, err) - resultCh <- false - return - } - } - - // Block for consul-template - // TODO Hooks should register themselves as blocking and then we can - // periodically enumerate what we are still blocked on - select { - case <-r.unblockCh: - // Send the start signal - select { - case r.startCh <- struct{}{}: - default: - } - - resultCh <- true - return - case <-r.waitCh: - // The run loop has exited so exit too - resultCh <- false - return - } - - RESTART: - restart := r.shouldRestart() - if !restart { - resultCh <- false - return - } - } -} - -// postrun is used to do any cleanup that is necessary after exiting the runloop -func (r *TaskRunner) postrun() { - // Stop the template manager - if r.templateManager != nil { - r.templateManager.Stop() - } -} - -// run is the main run loop that handles starting the application, destroying -// it, restarts and signals. -func (r *TaskRunner) run() { - // Predeclare things so we can jump to the RESTART - var stopCollection chan struct{} - var handleWaitCh chan *dstructs.WaitResult - - // If we already have a handle, populate the stopCollection and handleWaitCh - // to fix the invariant that it exists. - handleEmpty := r.getHandle() == nil - - if !handleEmpty { - stopCollection = make(chan struct{}) - go r.collectResourceUsageStats(stopCollection) - handleWaitCh = r.handle.WaitCh() - } - - for { - // Do the prestart activities - prestartResultCh := make(chan bool, 1) - go r.prestart(r.alloc, r.task, prestartResultCh) - - WAIT: - for { - select { - case success := <-prestartResultCh: - if !success { - r.cleanup() - r.setState(structs.TaskStateDead, nil, false) - return - } - case <-r.startCh: - // Start the task if not yet started or it is being forced. This logic - // is necessary because in the case of a restore the handle already - // exists. - handleEmpty := r.getHandle() == nil - if handleEmpty { - startErr := r.startTask() - r.restartTracker.SetStartError(startErr) - if startErr != nil { - r.setState("", structs.NewTaskEvent(structs.TaskDriverFailure).SetDriverError(startErr), true) - goto RESTART - } - - // Mark the task as started - r.setState(structs.TaskStateRunning, structs.NewTaskEvent(structs.TaskStarted), false) - r.runningLock.Lock() - r.running = true - r.runningLock.Unlock() - - if stopCollection == nil { - stopCollection = make(chan struct{}) - go r.collectResourceUsageStats(stopCollection) - } - - handleWaitCh = r.handle.WaitCh() - } - - case waitRes := <-handleWaitCh: - if waitRes == nil { - panic("nil wait") - } - - r.runningLock.Lock() - r.running = false - r.runningLock.Unlock() - - // Stop collection of the task's resource usage - close(stopCollection) - - // Log whether the task was successful or not. - r.restartTracker.SetWaitResult(waitRes) - r.setState("", r.waitErrorToEvent(waitRes), true) - if !waitRes.Successful() { - r.logger.Printf("[INFO] client: task %q for alloc %q failed: %v", r.task.Name, r.alloc.ID, waitRes) - } else { - r.logger.Printf("[INFO] client: task %q for alloc %q completed successfully", r.task.Name, r.alloc.ID) - } - - break WAIT - case update := <-r.updateCh: - if err := r.handleUpdate(update); err != nil { - r.logger.Printf("[ERR] client: update to task %q failed: %v", r.task.Name, err) - } - - case se := <-r.signalCh: - r.runningLock.Lock() - running := r.running - r.runningLock.Unlock() - common := fmt.Sprintf("signal %v to task %v for alloc %q", se.s, r.task.Name, r.alloc.ID) - if !running { - // Send no error - r.logger.Printf("[DEBUG] client: skipping %s", common) - se.result <- nil - continue - } - - r.logger.Printf("[DEBUG] client: sending %s", common) - r.setState(structs.TaskStateRunning, se.e, false) - - res := r.handle.Signal(se.s) - se.result <- res - - case restartEvent := <-r.restartCh: - r.runningLock.Lock() - running := r.running - r.runningLock.Unlock() - common := fmt.Sprintf("task %v for alloc %q", r.task.Name, r.alloc.ID) - if !running { - r.logger.Printf("[DEBUG] client: skipping restart of %v: task isn't running", common) - continue - } - - r.logger.Printf("[DEBUG] client: restarting %s: %v", common, restartEvent.taskEvent.RestartReason) - r.setState(structs.TaskStateRunning, restartEvent.taskEvent, false) - r.killTask(nil) - - close(stopCollection) - - if handleWaitCh != nil { - <-handleWaitCh - } - - r.restartTracker.SetRestartTriggered(restartEvent.failure) - break WAIT - - case <-r.destroyCh: - r.runningLock.Lock() - running := r.running - r.runningLock.Unlock() - if !running { - r.cleanup() - r.setState(structs.TaskStateDead, r.destroyEvent, false) - return - } - - // Remove from consul before killing the task so that traffic - // can be rerouted - r.removeServices() - - // Delay actually killing the task if configured. See #244 - if r.task.ShutdownDelay > 0 { - r.logger.Printf("[DEBUG] client: delaying shutdown of alloc %q task %q for %q", - r.alloc.ID, r.task.Name, r.task.ShutdownDelay) - <-time.After(r.task.ShutdownDelay) - } - - // Store the task event that provides context on the task - // destroy. The Killed event is set from the alloc_runner and - // doesn't add detail - var killEvent *structs.TaskEvent - if r.destroyEvent.Type != structs.TaskKilled { - if r.destroyEvent.Type == structs.TaskKilling { - killEvent = r.destroyEvent - } else { - r.setState(structs.TaskStateRunning, r.destroyEvent, false) - } - } - - r.killTask(killEvent) - close(stopCollection) - - // Wait for handler to exit before calling cleanup - <-handleWaitCh - r.cleanup() - - r.setState(structs.TaskStateDead, nil, false) - return - } - } - - RESTART: - // shouldRestart will block if the task should restart after a delay. - restart := r.shouldRestart() - if !restart { - r.cleanup() - r.setState(structs.TaskStateDead, nil, false) - return - } - - // Clear the handle so a new driver will be created. - r.handleLock.Lock() - r.handle = nil - handleWaitCh = nil - stopCollection = nil - r.handleLock.Unlock() - } -} - -// cleanup removes Consul entries and calls Driver.Cleanup when a task is -// stopping. Errors are logged. -func (r *TaskRunner) cleanup() { - // Remove from Consul - r.removeServices() - - drv, err := r.createDriver() - if err != nil { - r.logger.Printf("[ERR] client: error creating driver to cleanup resources: %v", err) - return - } - - res := r.getCreatedResources() - - ctx := driver.NewExecContext(r.taskDir, r.envBuilder.Build()) - attempts := 1 - var cleanupErr error - for retry := true; retry; attempts++ { - cleanupErr = drv.Cleanup(ctx, res) - retry = structs.IsRecoverable(cleanupErr) - - // Copy current createdResources state in case SaveState is - // called between retries - r.setCreatedResources(res) - - // Retry 3 times with sleeps between - if !retry || attempts > 3 { - break - } - time.Sleep(time.Duration(attempts) * time.Second) - } - - if cleanupErr != nil { - r.logger.Printf("[ERR] client: error cleaning up resources for task %q after %d attempts: %v", r.task.Name, attempts, cleanupErr) - } - return -} - -// shouldRestart returns if the task should restart. If the return value is -// true, the task's restart policy has already been considered and any wait time -// between restarts has been applied. -func (r *TaskRunner) shouldRestart() bool { - state, when := r.restartTracker.GetState() - reason := r.restartTracker.GetReason() - switch state { - case structs.TaskNotRestarting, structs.TaskTerminated: - r.logger.Printf("[INFO] client: Not restarting task: %v for alloc: %v ", r.task.Name, r.alloc.ID) - if state == structs.TaskNotRestarting { - r.setState(structs.TaskStateDead, - structs.NewTaskEvent(structs.TaskNotRestarting). - SetRestartReason(reason).SetFailsTask(), - false) - } - return false - case structs.TaskRestarting: - r.logger.Printf("[INFO] client: Restarting task %q for alloc %q in %v", r.task.Name, r.alloc.ID, when) - r.setState(structs.TaskStatePending, - structs.NewTaskEvent(structs.TaskRestarting). - SetRestartDelay(when). - SetRestartReason(reason), - false) - default: - r.logger.Printf("[ERR] client: restart tracker returned unknown state: %q", state) - return false - } - - // Unregister from Consul while waiting to restart. - r.removeServices() - - // Sleep but watch for destroy events. - select { - case <-time.After(when): - case <-r.destroyCh: - } - - // Destroyed while we were waiting to restart, so abort. - r.destroyLock.Lock() - destroyed := r.destroy - r.destroyLock.Unlock() - if destroyed { - r.logger.Printf("[DEBUG] client: Not restarting task: %v because it has been destroyed", r.task.Name) - r.setState(structs.TaskStateDead, r.destroyEvent, false) - return false - } - - return true -} - -// killTask kills the running task. A killing event can optionally be passed and -// this event is used to mark the task as being killed. It provides a means to -// store extra information. -func (r *TaskRunner) killTask(killingEvent *structs.TaskEvent) { - r.runningLock.Lock() - running := r.running - r.runningLock.Unlock() - if !running { - return - } - - // Get the kill timeout - timeout := driver.GetKillTimeout(r.task.KillTimeout, r.config.MaxKillTimeout) - - // Build the event - var event *structs.TaskEvent - if killingEvent != nil { - event = killingEvent - event.Type = structs.TaskKilling - } else { - event = structs.NewTaskEvent(structs.TaskKilling) - } - event.SetKillTimeout(timeout) - - // Mark that we received the kill event - r.setState(structs.TaskStateRunning, event, false) - - handle := r.getHandle() - - // Kill the task using an exponential backoff in-case of failures. - destroySuccess, err := r.handleDestroy(handle) - if !destroySuccess { - // We couldn't successfully destroy the resource created. - r.logger.Printf("[ERR] client: failed to kill task %q. Resources may have been leaked: %v", r.task.Name, err) - } - - r.runningLock.Lock() - r.running = false - r.runningLock.Unlock() - - // Store that the task has been destroyed and any associated error. - r.setState("", structs.NewTaskEvent(structs.TaskKilled).SetKillError(err), true) -} - -// startTask creates the driver, task dir, and starts the task. -func (r *TaskRunner) startTask() error { - // Create a driver - drv, err := r.createDriver() - if err != nil { - return fmt.Errorf("failed to create driver of task %q for alloc %q: %v", - r.task.Name, r.alloc.ID, err) - } - - // Run prestart - ctx := driver.NewExecContext(r.taskDir, r.envBuilder.Build()) - presp, err := drv.Prestart(ctx, r.task) - - // Merge newly created resources into previously created resources - if presp != nil { - r.createdResourcesLock.Lock() - r.createdResources.Merge(presp.CreatedResources) - r.createdResourcesLock.Unlock() - - // Set any network configuration returned by the driver - r.envBuilder.SetDriverNetwork(presp.Network) - } - - if err != nil { - wrapped := fmt.Sprintf("failed to initialize task %q for alloc %q: %v", - r.task.Name, r.alloc.ID, err) - r.logger.Printf("[WARN] client: error from prestart: %s", wrapped) - return structs.WrapRecoverable(wrapped, err) - } - - // Create a new context for Start since the environment may have been updated. - ctx = driver.NewExecContext(r.taskDir, r.envBuilder.Build()) - - // Start the job - sresp, err := drv.Start(ctx, r.task) - if err != nil { - wrapped := fmt.Sprintf("failed to start task %q for alloc %q: %v", - r.task.Name, r.alloc.ID, err) - r.logger.Printf("[WARN] client: %s", wrapped) - return structs.WrapRecoverable(wrapped, err) - - } - - // Log driver network information - if sresp.Network != nil && sresp.Network.IP != "" { - if sresp.Network.AutoAdvertise { - r.logger.Printf("[INFO] client: alloc %s task %s auto-advertising detected IP %s", - r.alloc.ID, r.task.Name, sresp.Network.IP) - } else { - r.logger.Printf("[TRACE] client: alloc %s task %s detected IP %s but not auto-advertising", - r.alloc.ID, r.task.Name, sresp.Network.IP) - } - } - - if sresp.Network == nil || sresp.Network.IP == "" { - r.logger.Printf("[TRACE] client: alloc %s task %s could not detect a driver IP", r.alloc.ID, r.task.Name) - } - - // Update environment with the network defined by the driver's Start method. - r.envBuilder.SetDriverNetwork(sresp.Network) - - if err := r.registerServices(drv, sresp.Handle, sresp.Network); err != nil { - // All IO is done asynchronously, so errors from registering - // services are hard failures. - r.logger.Printf("[ERR] client: failed to register services and checks for task %q alloc %q: %v", r.task.Name, r.alloc.ID, err) - - // Kill the started task - if destroyed, err := r.handleDestroy(sresp.Handle); !destroyed { - r.logger.Printf("[ERR] client: failed to kill task %q alloc %q. Resources may be leaked: %v", - r.task.Name, r.alloc.ID, err) - } - return structs.NewRecoverableError(err, false) - } - - r.handleLock.Lock() - r.handle = sresp.Handle - r.handleLock.Unlock() - - // Need to persist the driver network between restarts - r.driverNetLock.Lock() - r.driverNet = sresp.Network - r.driverNetLock.Unlock() - - return nil -} - -// registerServices and checks with Consul. -func (r *TaskRunner) registerServices(d driver.Driver, h driver.DriverHandle, n *cstructs.DriverNetwork) error { - //var exec driver.ScriptExecutor - //if d.Abilities().Exec { - // // Allow set the script executor if the driver supports it - // exec = h - //} - //interpolatedTask := interpolateServices(r.envBuilder.Build(), r.task) - //taskServices := consul.NewTaskServices(r.alloc, interpolatedTask, r, exec, n) - panic("XXX broken during transition to allocrunner") - return r.consul.RegisterTask(nil) -} - -// interpolateServices interpolates tags in a service and checks with values from the -// task's environment. -func interpolateServices(taskEnv *taskenv.TaskEnv, task *structs.Task) *structs.Task { - taskCopy := task.Copy() - for _, service := range taskCopy.Services { - for _, check := range service.Checks { - check.Name = taskEnv.ReplaceEnv(check.Name) - check.Type = taskEnv.ReplaceEnv(check.Type) - check.Command = taskEnv.ReplaceEnv(check.Command) - check.Args = taskEnv.ParseAndReplace(check.Args) - check.Path = taskEnv.ReplaceEnv(check.Path) - check.Protocol = taskEnv.ReplaceEnv(check.Protocol) - check.PortLabel = taskEnv.ReplaceEnv(check.PortLabel) - check.InitialStatus = taskEnv.ReplaceEnv(check.InitialStatus) - check.Method = taskEnv.ReplaceEnv(check.Method) - check.GRPCService = taskEnv.ReplaceEnv(check.GRPCService) - if len(check.Header) > 0 { - header := make(map[string][]string, len(check.Header)) - for k, vs := range check.Header { - newVals := make([]string, len(vs)) - for i, v := range vs { - newVals[i] = taskEnv.ReplaceEnv(v) - } - header[taskEnv.ReplaceEnv(k)] = newVals - } - check.Header = header - } - } - service.Name = taskEnv.ReplaceEnv(service.Name) - service.PortLabel = taskEnv.ReplaceEnv(service.PortLabel) - service.Tags = taskEnv.ParseAndReplace(service.Tags) - service.CanaryTags = taskEnv.ParseAndReplace(service.CanaryTags) - } - return taskCopy -} - -// buildTaskDir creates the task directory before driver.Prestart. It is safe -// to call multiple times as its state is persisted. -func (r *TaskRunner) buildTaskDir(fsi cstructs.FSIsolation) error { - r.persistLock.Lock() - built := r.taskDirBuilt - r.persistLock.Unlock() - - // We do not set the state again since this only occurs during restoration - // and the task dir is already built. The reason we call Build again is to - // ensure that the task dir invariants are still held. - if !built { - r.setState(structs.TaskStatePending, - structs.NewTaskEvent(structs.TaskSetup).SetMessage(structs.TaskBuildingTaskDir), - false) - } - - chroot := config.DefaultChrootEnv - if len(r.config.ChrootEnv) > 0 { - chroot = r.config.ChrootEnv - } - if err := r.taskDir.Build(built, chroot, fsi); err != nil { - return err - } - - // Mark task dir as successfully built - r.persistLock.Lock() - r.taskDirBuilt = true - r.persistLock.Unlock() - - // Set path and host related env vars - driver.SetEnvvars(r.envBuilder, fsi, r.taskDir, r.config) - return nil -} - -// collectResourceUsageStats starts collecting resource usage stats of a Task. -// Collection ends when the passed channel is closed -func (r *TaskRunner) collectResourceUsageStats(stopCollection <-chan struct{}) { - // start collecting the stats right away and then start collecting every - // collection interval - next := time.NewTimer(0) - defer next.Stop() - for { - select { - case <-next.C: - next.Reset(r.config.StatsCollectionInterval) - handle := r.getHandle() - if handle == nil { - continue - } - ru, err := handle.Stats() - - if err != nil { - // Check if the driver doesn't implement stats - if err.Error() == cstructs.DriverStatsNotImplemented.Error() { - r.logger.Printf("[DEBUG] client: driver for task %q in allocation %q doesn't support stats", r.task.Name, r.alloc.ID) - return - } - - // We do not log when the plugin is shutdown as this is simply a - // race between the stopCollection channel being closed and calling - // Stats on the handle. - if !strings.Contains(err.Error(), "connection is shut down") { - r.logger.Printf("[DEBUG] client: error fetching stats of task %v: %v", r.task.Name, err) - } - continue - } - - r.resourceUsageLock.Lock() - r.resourceUsage = ru - r.resourceUsageLock.Unlock() - if ru != nil { - r.emitStats(ru) - } - case <-stopCollection: - return - } - } -} - -// LatestResourceUsage returns the last resource utilization datapoint collected -func (r *TaskRunner) LatestResourceUsage() *cstructs.TaskResourceUsage { - r.resourceUsageLock.RLock() - defer r.resourceUsageLock.RUnlock() - r.runningLock.Lock() - defer r.runningLock.Unlock() - - // If the task is not running there can be no latest resource - if !r.running { - return nil - } - - return r.resourceUsage -} - -// handleUpdate takes an updated allocation and updates internal state to -// reflect the new config for the task. -func (r *TaskRunner) handleUpdate(update *structs.Allocation) error { - // Extract the task group from the alloc. - tg := update.Job.LookupTaskGroup(update.TaskGroup) - if tg == nil { - return fmt.Errorf("alloc '%s' missing task group '%s'", update.ID, update.TaskGroup) - } - - // Extract the task. - var updatedTask *structs.Task - for _, t := range tg.Tasks { - if t.Name == r.task.Name { - updatedTask = t.Copy() - break - } - } - if updatedTask == nil { - return fmt.Errorf("task group %q doesn't contain task %q", tg.Name, r.task.Name) - } - - // Merge in the task resources - updatedTask.Resources = update.TaskResources[updatedTask.Name] - - // Interpolate the old task with the old env before updating the env as - // updating services in Consul need both the old and new interpolations - // to find differences. - oldInterpolatedTask := interpolateServices(r.envBuilder.Build(), r.task) - - // Now it's safe to update the environment - r.envBuilder.UpdateTask(update, updatedTask) - - var mErr multierror.Error - r.handleLock.Lock() - if r.handle != nil { - drv, err := r.createDriver() - if err != nil { - // Something has really gone wrong; don't continue - r.handleLock.Unlock() - return fmt.Errorf("error accessing driver when updating task %q: %v", r.task.Name, err) - } - - // Update will update resources and store the new kill timeout. - if err := r.handle.Update(updatedTask); err != nil { - mErr.Errors = append(mErr.Errors, fmt.Errorf("updating task resources failed: %v", err)) - } - - // Update services in Consul - newInterpolatedTask := interpolateServices(r.envBuilder.Build(), updatedTask) - if err := r.updateServices(drv, r.handle, r.alloc, oldInterpolatedTask, update, newInterpolatedTask); err != nil { - mErr.Errors = append(mErr.Errors, fmt.Errorf("error updating services and checks in Consul: %v", err)) - } - } - r.handleLock.Unlock() - - // Update the restart policy. - if r.restartTracker != nil { - r.restartTracker.SetPolicy(tg.RestartPolicy) - } - - // Store the updated alloc. - r.alloc = update - r.task = updatedTask - return mErr.ErrorOrNil() -} - -// updateServices and checks with Consul. Tasks must be interpolated! -func (r *TaskRunner) updateServices(d driver.Driver, h driver.ScriptExecutor, - oldAlloc *structs.Allocation, oldTask *structs.Task, - newAlloc *structs.Allocation, newTask *structs.Task) error { - - //var exec driver.ScriptExecutor - //if d.Abilities().Exec { - // // Allow set the script executor if the driver supports it - // exec = h - //} - //r.driverNetLock.Lock() - //net := r.driverNet.Copy() - //r.driverNetLock.Unlock() - //oldTaskServices := consul.NewTaskServices(oldAlloc, oldTask, r, exec, net) - //newTaskServices := consul.NewTaskServices(newAlloc, newTask, r, exec, net) - panic("XXX broken during transition to allocrunner") - //return r.consul.UpdateTask(oldTaskServices, newTaskServices) - return r.consul.UpdateTask(nil, nil) -} - -// removeServices and checks from Consul. Handles interpolation and deleting -// Canary=true and Canary=false versions in case Canary=false is set at the -// same time as the alloc is stopped. -func (r *TaskRunner) removeServices() { - panic("XXX broken during transition to allocrunner") - //interpTask := interpolateServices(r.envBuilder.Build(), r.task) - //taskServices := consul.NewTaskServices(r.alloc, interpTask, r, nil, nil) - //r.consul.RemoveTask(taskServices) - - // Flip Canary and remove again in case canary is getting flipped at - // the same time as the alloc is being destroyed - //taskServices.Canary = !taskServices.Canary - //r.consul.RemoveTask(taskServices) -} - -// handleDestroy kills the task handle. In the case that killing fails, -// handleDestroy will retry with an exponential backoff and will give up at a -// given limit. It returns whether the task was destroyed and the error -// associated with the last kill attempt. -func (r *TaskRunner) handleDestroy(handle driver.DriverHandle) (destroyed bool, err error) { - // Cap the number of times we attempt to kill the task. - for i := 0; i < killFailureLimit; i++ { - if err = handle.Kill(); err != nil { - // Calculate the new backoff - backoff := (1 << (2 * uint64(i))) * killBackoffBaseline - if backoff > killBackoffLimit { - backoff = killBackoffLimit - } - - r.logger.Printf("[ERR] client: failed to kill task '%s' for alloc %q. Retrying in %v: %v", - r.task.Name, r.alloc.ID, backoff, err) - time.Sleep(backoff) - } else { - // Kill was successful - return true, nil - } - } - return -} - -// Restart will restart the task. -func (r *TaskRunner) Restart(source, reason string, failure bool) { - reasonStr := fmt.Sprintf("%s: %s", source, reason) - event := newTaskRestartEvent(reasonStr, failure) - - select { - case r.restartCh <- event: - case <-r.waitCh: - } -} - -// Signal will send a signal to the task -func (r *TaskRunner) Signal(source, reason string, s os.Signal) error { - - reasonStr := fmt.Sprintf("%s: %s", source, reason) - event := structs.NewTaskEvent(structs.TaskSignaling).SetTaskSignal(s).SetTaskSignalReason(reasonStr) - - resCh := make(chan error) - se := SignalEvent{ - s: s, - e: event, - result: resCh, - } - - select { - case r.signalCh <- se: - case <-r.waitCh: - } - - return <-resCh -} - -// Kill will kill a task and store the error, no longer restarting the task. If -// fail is set, the task is marked as having failed. -func (r *TaskRunner) Kill(source, reason string, fail bool) { - reasonStr := fmt.Sprintf("%s: %s", source, reason) - event := structs.NewTaskEvent(structs.TaskKilling).SetKillReason(reasonStr) - if fail { - event.SetFailsTask() - } - - r.logger.Printf("[DEBUG] client: killing task %v for alloc %q: %v", r.task.Name, r.alloc.ID, reasonStr) - r.Destroy(event) -} - -func (r *TaskRunner) EmitEvent(source, message string) { - event := structs.NewTaskEvent(source). - SetMessage(message) - r.setState("", event, false) - r.logger.Printf("[DEBUG] client: event from %q for task %q in alloc %q: %v", - source, r.task.Name, r.alloc.ID, message) -} - -// UnblockStart unblocks the starting of the task. It currently assumes only -// consul-template will unblock -func (r *TaskRunner) UnblockStart(source string) { - r.unblockLock.Lock() - defer r.unblockLock.Unlock() - if r.unblocked { - return - } - - r.logger.Printf("[DEBUG] client: unblocking task %v for alloc %q: %v", r.task.Name, r.alloc.ID, source) - r.unblocked = true - close(r.unblockCh) -} - -// Helper function for converting a WaitResult into a TaskTerminated event. -func (r *TaskRunner) waitErrorToEvent(res *dstructs.WaitResult) *structs.TaskEvent { - return structs.NewTaskEvent(structs.TaskTerminated). - SetExitCode(res.ExitCode). - SetSignal(res.Signal). - SetExitMessage(res.Err) -} - -// Update is used to update the task of the context -func (r *TaskRunner) Update(update *structs.Allocation) { - select { - case r.updateCh <- update: - default: - r.logger.Printf("[ERR] client: dropping task update '%s' (alloc '%s')", - r.task.Name, r.alloc.ID) - } -} - -// Destroy is used to indicate that the task context should be destroyed. The -// event parameter provides a context for the destroy. -func (r *TaskRunner) Destroy(event *structs.TaskEvent) { - r.destroyLock.Lock() - defer r.destroyLock.Unlock() - - if r.destroy { - return - } - r.destroy = true - r.destroyEvent = event - close(r.destroyCh) -} - -// getCreatedResources returns the resources created by drivers. It will never -// return nil. -func (r *TaskRunner) getCreatedResources() *driver.CreatedResources { - r.createdResourcesLock.Lock() - if r.createdResources == nil { - r.createdResources = driver.NewCreatedResources() - } - cr := r.createdResources.Copy() - r.createdResourcesLock.Unlock() - - return cr -} - -// setCreatedResources updates the resources created by drivers. If passed nil -// it will set createdResources to an initialized struct. -func (r *TaskRunner) setCreatedResources(cr *driver.CreatedResources) { - if cr == nil { - cr = driver.NewCreatedResources() - } - r.createdResourcesLock.Lock() - r.createdResources = cr.Copy() - r.createdResourcesLock.Unlock() -} - -func (r *TaskRunner) setGaugeForMemory(ru *cstructs.TaskResourceUsage) { - if !r.config.DisableTaggedMetrics { - metrics.SetGaugeWithLabels([]string{"client", "allocs", "memory", "rss"}, - float32(ru.ResourceUsage.MemoryStats.RSS), r.baseLabels) - metrics.SetGaugeWithLabels([]string{"client", "allocs", "memory", "rss"}, - float32(ru.ResourceUsage.MemoryStats.RSS), r.baseLabels) - metrics.SetGaugeWithLabels([]string{"client", "allocs", "memory", "cache"}, - float32(ru.ResourceUsage.MemoryStats.Cache), r.baseLabels) - metrics.SetGaugeWithLabels([]string{"client", "allocs", "memory", "swap"}, - float32(ru.ResourceUsage.MemoryStats.Swap), r.baseLabels) - metrics.SetGaugeWithLabels([]string{"client", "allocs", "memory", "max_usage"}, - float32(ru.ResourceUsage.MemoryStats.MaxUsage), r.baseLabels) - metrics.SetGaugeWithLabels([]string{"client", "allocs", "memory", "kernel_usage"}, - float32(ru.ResourceUsage.MemoryStats.KernelUsage), r.baseLabels) - metrics.SetGaugeWithLabels([]string{"client", "allocs", "memory", "kernel_max_usage"}, - float32(ru.ResourceUsage.MemoryStats.KernelMaxUsage), r.baseLabels) - } - - if r.config.BackwardsCompatibleMetrics { - metrics.SetGauge([]string{"client", "allocs", r.alloc.Job.Name, r.alloc.TaskGroup, r.alloc.ID, r.task.Name, "memory", "rss"}, float32(ru.ResourceUsage.MemoryStats.RSS)) - metrics.SetGauge([]string{"client", "allocs", r.alloc.Job.Name, r.alloc.TaskGroup, r.alloc.ID, r.task.Name, "memory", "cache"}, float32(ru.ResourceUsage.MemoryStats.Cache)) - metrics.SetGauge([]string{"client", "allocs", r.alloc.Job.Name, r.alloc.TaskGroup, r.alloc.ID, r.task.Name, "memory", "swap"}, float32(ru.ResourceUsage.MemoryStats.Swap)) - metrics.SetGauge([]string{"client", "allocs", r.alloc.Job.Name, r.alloc.TaskGroup, r.alloc.ID, r.task.Name, "memory", "max_usage"}, float32(ru.ResourceUsage.MemoryStats.MaxUsage)) - metrics.SetGauge([]string{"client", "allocs", r.alloc.Job.Name, r.alloc.TaskGroup, r.alloc.ID, r.task.Name, "memory", "kernel_usage"}, float32(ru.ResourceUsage.MemoryStats.KernelUsage)) - metrics.SetGauge([]string{"client", "allocs", r.alloc.Job.Name, r.alloc.TaskGroup, r.alloc.ID, r.task.Name, "memory", "kernel_max_usage"}, float32(ru.ResourceUsage.MemoryStats.KernelMaxUsage)) - } -} - -func (r *TaskRunner) setGaugeForCPU(ru *cstructs.TaskResourceUsage) { - if !r.config.DisableTaggedMetrics { - metrics.SetGaugeWithLabels([]string{"client", "allocs", "cpu", "total_percent"}, - float32(ru.ResourceUsage.CpuStats.Percent), r.baseLabels) - metrics.SetGaugeWithLabels([]string{"client", "allocs", "cpu", "system"}, - float32(ru.ResourceUsage.CpuStats.SystemMode), r.baseLabels) - metrics.SetGaugeWithLabels([]string{"client", "allocs", "cpu", "user"}, - float32(ru.ResourceUsage.CpuStats.UserMode), r.baseLabels) - metrics.SetGaugeWithLabels([]string{"client", "allocs", "cpu", "throttled_time"}, - float32(ru.ResourceUsage.CpuStats.ThrottledTime), r.baseLabels) - metrics.SetGaugeWithLabels([]string{"client", "allocs", "cpu", "throttled_periods"}, - float32(ru.ResourceUsage.CpuStats.ThrottledPeriods), r.baseLabels) - metrics.SetGaugeWithLabels([]string{"client", "allocs", "cpu", "total_ticks"}, - float32(ru.ResourceUsage.CpuStats.TotalTicks), r.baseLabels) - } - - if r.config.BackwardsCompatibleMetrics { - metrics.SetGauge([]string{"client", "allocs", r.alloc.Job.Name, r.alloc.TaskGroup, r.alloc.ID, r.task.Name, "cpu", "total_percent"}, float32(ru.ResourceUsage.CpuStats.Percent)) - metrics.SetGauge([]string{"client", "allocs", r.alloc.Job.Name, r.alloc.TaskGroup, r.alloc.ID, r.task.Name, "cpu", "system"}, float32(ru.ResourceUsage.CpuStats.SystemMode)) - metrics.SetGauge([]string{"client", "allocs", r.alloc.Job.Name, r.alloc.TaskGroup, r.alloc.ID, r.task.Name, "cpu", "user"}, float32(ru.ResourceUsage.CpuStats.UserMode)) - metrics.SetGauge([]string{"client", "allocs", r.alloc.Job.Name, r.alloc.TaskGroup, r.alloc.ID, r.task.Name, "cpu", "throttled_time"}, float32(ru.ResourceUsage.CpuStats.ThrottledTime)) - metrics.SetGauge([]string{"client", "allocs", r.alloc.Job.Name, r.alloc.TaskGroup, r.alloc.ID, r.task.Name, "cpu", "throttled_periods"}, float32(ru.ResourceUsage.CpuStats.ThrottledPeriods)) - metrics.SetGauge([]string{"client", "allocs", r.alloc.Job.Name, r.alloc.TaskGroup, r.alloc.ID, r.task.Name, "cpu", "total_ticks"}, float32(ru.ResourceUsage.CpuStats.TotalTicks)) - } -} - -// emitStats emits resource usage stats of tasks to remote metrics collector -// sinks -func (r *TaskRunner) emitStats(ru *cstructs.TaskResourceUsage) { - if !r.config.PublishAllocationMetrics { - return - } - - // If the task is not running don't emit anything - r.runningLock.Lock() - running := r.running - r.runningLock.Unlock() - if !running { - return - } - - if ru.ResourceUsage.MemoryStats != nil { - r.setGaugeForMemory(ru) - } - - if ru.ResourceUsage.CpuStats != nil { - r.setGaugeForCPU(ru) - } -} diff --git a/client/allocrunnerdeprecated/taskrunner/task_runner_test.go b/client/allocrunnerdeprecated/taskrunner/task_runner_test.go deleted file mode 100644 index 3f39329df30d..000000000000 --- a/client/allocrunnerdeprecated/taskrunner/task_runner_test.go +++ /dev/null @@ -1,2035 +0,0 @@ -// +build deprecated - -package taskrunner - -import ( - "fmt" - "io/ioutil" - "net/http" - "net/http/httptest" - "os" - "path/filepath" - "reflect" - "strings" - "syscall" - "testing" - "time" - - "github.com/boltdb/bolt" - "github.com/golang/snappy" - "github.com/hashicorp/nomad/client/allocdir" - "github.com/hashicorp/nomad/client/allocrunner/taskrunner/restarts" - "github.com/hashicorp/nomad/client/config" - consulApi "github.com/hashicorp/nomad/client/consul" - cstructs "github.com/hashicorp/nomad/client/structs" - "github.com/hashicorp/nomad/client/taskenv" - "github.com/hashicorp/nomad/client/vaultclient" - "github.com/hashicorp/nomad/command/agent/consul" - "github.com/hashicorp/nomad/helper/testlog" - "github.com/hashicorp/nomad/nomad/mock" - "github.com/hashicorp/nomad/nomad/structs" - "github.com/hashicorp/nomad/testutil" - "github.com/kr/pretty" -) - -// Returns a tracker that never restarts. -func noRestartsTracker() *restarts.RestartTracker { - policy := &structs.RestartPolicy{Attempts: 0, Mode: structs.RestartPolicyModeFail} - return restarts.NewRestartTracker(policy, structs.JobTypeBatch) -} - -type MockTaskStateUpdater struct { - state string - failed bool - events []*structs.TaskEvent -} - -func (m *MockTaskStateUpdater) Update(name, state string, event *structs.TaskEvent, _ bool) { - if state != "" { - m.state = state - } - if event != nil { - if event.FailsTask { - m.failed = true - } - m.events = append(m.events, event) - } -} - -// String for debugging purposes. -func (m *MockTaskStateUpdater) String() string { - s := fmt.Sprintf("Updates:\n state=%q\n failed=%t\n events=\n", m.state, m.failed) - for _, e := range m.events { - s += fmt.Sprintf(" %#v\n", e) - } - return s -} - -type taskRunnerTestCtx struct { - upd *MockTaskStateUpdater - tr *TaskRunner - allocDir *allocdir.AllocDir - vault *vaultclient.MockVaultClient - consul *consul.MockAgent - consulClient *consul.ServiceClient -} - -// Cleanup calls Destroy on the task runner and alloc dir -func (ctx *taskRunnerTestCtx) Cleanup() { - ctx.consulClient.Shutdown() - ctx.tr.Destroy(structs.NewTaskEvent(structs.TaskKilled)) - ctx.allocDir.Destroy() -} - -func testTaskRunner(t *testing.T, restarts bool) *taskRunnerTestCtx { - // Use mock driver - alloc := mock.Alloc() - task := alloc.Job.TaskGroups[0].Tasks[0] - task.Driver = "mock_driver" - task.Config = map[string]interface{}{ - "run_for": "500ms", - } - return testTaskRunnerFromAlloc(t, restarts, alloc) -} - -// Creates a mock task runner using the first task in the first task group of -// the passed allocation. -// -// Callers should defer Cleanup() to cleanup after completion -func testTaskRunnerFromAlloc(t *testing.T, restarts bool, alloc *structs.Allocation) *taskRunnerTestCtx { - logger := testlog.Logger(t) - conf := config.DefaultConfig() - conf.Node = mock.Node() - conf.StateDir = os.TempDir() - conf.AllocDir = os.TempDir() - - tmp, err := ioutil.TempFile("", "state-db") - if err != nil { - t.Fatalf("error creating state db file: %v", err) - } - db, err := bolt.Open(tmp.Name(), 0600, nil) - if err != nil { - t.Fatalf("error creating state db: %v", err) - } - - upd := &MockTaskStateUpdater{} - task := alloc.Job.TaskGroups[0].Tasks[0] - - allocDir := allocdir.NewAllocDir(testlog.Logger(t), filepath.Join(conf.AllocDir, alloc.ID)) - if err := allocDir.Build(); err != nil { - t.Fatalf("error building alloc dir: %v", err) - return nil - } - - //HACK to get FSIsolation and chroot without using AllocRunner, - // TaskRunner, or Drivers - fsi := cstructs.FSIsolationImage - switch task.Driver { - case "raw_exec": - fsi = cstructs.FSIsolationNone - case "exec", "java": - fsi = cstructs.FSIsolationChroot - } - taskDir := allocDir.NewTaskDir(task.Name) - if err := taskDir.Build(false, config.DefaultChrootEnv, fsi); err != nil { - t.Fatalf("error building task dir %q: %v", task.Name, err) - return nil - } - - vclient := vaultclient.NewMockVaultClient() - cclient := consul.NewMockAgent() - serviceClient := consul.NewServiceClient(cclient, testlog.HCLogger(t), true) - go serviceClient.Run() - tr := NewTaskRunner(logger, conf, db, upd.Update, taskDir, alloc, task, vclient, serviceClient) - if !restarts { - tr.restartTracker = noRestartsTracker() - } - return &taskRunnerTestCtx{ - upd: upd, - tr: tr, - allocDir: allocDir, - vault: vclient, - consul: cclient, - consulClient: serviceClient, - } -} - -// testWaitForTaskToStart waits for the task to or fails the test -func testWaitForTaskToStart(t *testing.T, ctx *taskRunnerTestCtx) { - // Wait for the task to start - testutil.WaitForResult(func() (bool, error) { - l := len(ctx.upd.events) - if l < 2 { - return false, fmt.Errorf("Expect two events; got %v", l) - } - - if ctx.upd.events[0].Type != structs.TaskReceived { - return false, fmt.Errorf("First Event was %v; want %v", ctx.upd.events[0].Type, structs.TaskReceived) - } - - if l >= 3 { - if ctx.upd.events[1].Type != structs.TaskSetup { - return false, fmt.Errorf("Second Event was %v; want %v", ctx.upd.events[1].Type, structs.TaskSetup) - } - if ctx.upd.events[2].Type != structs.TaskStarted { - return false, fmt.Errorf("Third Event was %v; want %v", ctx.upd.events[2].Type, structs.TaskStarted) - } - } else { - if ctx.upd.events[1].Type != structs.TaskStarted { - return false, fmt.Errorf("Second Event was %v; want %v", ctx.upd.events[1].Type, structs.TaskStarted) - } - } - - return true, nil - }, func(err error) { - t.Fatalf("err: %v", err) - }) -} - -func TestTaskRunner_SimpleRun(t *testing.T) { - t.Parallel() - ctx := testTaskRunner(t, false) - ctx.tr.MarkReceived() - go ctx.tr.Run() - defer ctx.Cleanup() - - select { - case <-ctx.tr.WaitCh(): - case <-time.After(time.Duration(testutil.TestMultiplier()*15) * time.Second): - t.Fatalf("timeout") - } - - if len(ctx.upd.events) != 4 { - t.Fatalf("should have 3 ctx.updates: %#v", ctx.upd.events) - } - - if ctx.upd.state != structs.TaskStateDead { - t.Fatalf("TaskState %v; want %v", ctx.upd.state, structs.TaskStateDead) - } - - event := ctx.upd.events[0] - - if event.Type != structs.TaskReceived { - t.Fatalf("First Event was %v; want %v", ctx.upd.events[0].Type, structs.TaskReceived) - } - - event = ctx.upd.events[1] - if event.Type != structs.TaskSetup { - t.Fatalf("Second Event was %v; want %v", ctx.upd.events[1].Type, structs.TaskSetup) - } - displayMsg := event.DisplayMessage - - if displayMsg != "Building Task Directory" { - t.Fatalf("Bad display message:%v", displayMsg) - } - - event = ctx.upd.events[2] - if event.Type != structs.TaskStarted { - t.Fatalf("Second Event was %v; want %v", ctx.upd.events[2].Type, structs.TaskStarted) - } - displayMsg = event.DisplayMessage - if displayMsg != "Task started by client" { - t.Fatalf("Bad display message:%v", displayMsg) - } - - event = ctx.upd.events[3] - if event.Type != structs.TaskTerminated { - t.Fatalf("Third Event was %v; want %v", event.Type, structs.TaskTerminated) - } - displayMsg = event.DisplayMessage - if displayMsg != "Exit Code: 0" { - t.Fatalf("Bad display message:%v", displayMsg) - } - if event.Details["exit_code"] != "0" { - t.Fatalf("Bad details map :%v", event.Details) - } - -} - -func TestTaskRunner_Run_RecoverableStartError(t *testing.T) { - t.Parallel() - alloc := mock.Alloc() - task := alloc.Job.TaskGroups[0].Tasks[0] - task.Driver = "mock_driver" - task.Config = map[string]interface{}{ - "exit_code": 0, - "start_error": "driver failure", - "start_error_recoverable": true, - } - - ctx := testTaskRunnerFromAlloc(t, true, alloc) - ctx.tr.MarkReceived() - go ctx.tr.Run() - defer ctx.Cleanup() - - testutil.WaitForResult(func() (bool, error) { - if l := len(ctx.upd.events); l < 4 { - return false, fmt.Errorf("Expect at least four events; got %v", l) - } - - if ctx.upd.events[0].Type != structs.TaskReceived { - return false, fmt.Errorf("First Event was %v; want %v", ctx.upd.events[0].Type, structs.TaskReceived) - } - - if ctx.upd.events[1].Type != structs.TaskSetup { - return false, fmt.Errorf("Second Event was %v; want %v", ctx.upd.events[1].Type, structs.TaskSetup) - } - - if ctx.upd.events[2].Type != structs.TaskDriverFailure { - return false, fmt.Errorf("Third Event was %v; want %v", ctx.upd.events[2].Type, structs.TaskDriverFailure) - } - - if ctx.upd.events[3].Type != structs.TaskRestarting { - return false, fmt.Errorf("Fourth Event was %v; want %v", ctx.upd.events[3].Type, structs.TaskRestarting) - } - - return true, nil - }, func(err error) { - t.Fatalf("err: %v", err) - }) -} - -func TestTaskRunner_Destroy(t *testing.T) { - t.Parallel() - alloc := mock.Alloc() - task := alloc.Job.TaskGroups[0].Tasks[0] - task.Driver = "mock_driver" - task.Config = map[string]interface{}{ - "run_for": "1000s", - } - - ctx := testTaskRunnerFromAlloc(t, true, alloc) - ctx.tr.MarkReceived() - go ctx.tr.Run() - defer ctx.Cleanup() - - // Wait for the task to start - testWaitForTaskToStart(t, ctx) - - // Begin the tear down - ctx.tr.Destroy(structs.NewTaskEvent(structs.TaskKilled)) - - select { - case <-ctx.tr.WaitCh(): - case <-time.After(time.Duration(testutil.TestMultiplier()*15) * time.Second): - t.Fatalf("timeout") - } - - if len(ctx.upd.events) != 5 { - t.Fatalf("should have 5 ctx.updates: %#v", ctx.upd.events) - } - - if ctx.upd.state != structs.TaskStateDead { - t.Fatalf("TaskState %v; want %v", ctx.upd.state, structs.TaskStateDead) - } - - if ctx.upd.events[3].Type != structs.TaskKilling { - t.Fatalf("Third Event was %v; want %v", ctx.upd.events[3].Type, structs.TaskKilling) - } - - if ctx.upd.events[4].Type != structs.TaskKilled { - t.Fatalf("Third Event was %v; want %v", ctx.upd.events[4].Type, structs.TaskKilled) - } -} - -func TestTaskRunner_Update(t *testing.T) { - t.Parallel() - alloc := mock.Alloc() - task := alloc.Job.TaskGroups[0].Tasks[0] - task.Services[0].Checks[0] = &structs.ServiceCheck{ - Name: "http-check", - Type: "http", - PortLabel: "http", - Path: "${NOMAD_META_foo}", - } - task.Driver = "mock_driver" - task.Config = map[string]interface{}{ - "run_for": "100s", - } - - ctx := testTaskRunnerFromAlloc(t, true, alloc) - ctx.tr.MarkReceived() - go ctx.tr.Run() - defer ctx.Cleanup() - - testWaitForTaskToStart(t, ctx) - - // Update the task definition - updateAlloc := ctx.tr.alloc.Copy() - - // Update the restart policy - newTG := updateAlloc.Job.TaskGroups[0] - newMode := "foo" - newTG.RestartPolicy.Mode = newMode - - newTask := newTG.Tasks[0] - newTask.Driver = "mock_driver" - - // Update meta to make sure service checks are interpolated correctly - // #2180 - newTask.Meta["foo"] = "/UPDATE" - - // Update the kill timeout - oldHandle := ctx.tr.handle.ID() - newTask.KillTimeout = time.Hour - ctx.tr.Update(updateAlloc) - - // Wait for ctx.update to take place - testutil.WaitForResult(func() (bool, error) { - if ctx.tr.task == newTask { - return false, fmt.Errorf("We copied the pointer! This would be very bad") - } - if ctx.tr.task.Driver != newTask.Driver { - return false, fmt.Errorf("Task not copied") - } - if ctx.tr.restartTracker.GetPolicy().Mode != newMode { - return false, fmt.Errorf("expected restart policy %q but found %q", newMode, ctx.tr.restartTracker.GetPolicy().Mode) - } - if ctx.tr.handle.ID() == oldHandle { - return false, fmt.Errorf("handle not ctx.updated") - } - - // Make sure Consul services were interpolated correctly during - // the update #2180 - checks := ctx.consul.CheckRegs() - if n := len(checks); n != 1 { - return false, fmt.Errorf("expected 1 check but found %d", n) - } - for _, check := range checks { - if found := check.HTTP; !strings.HasSuffix(found, "/UPDATE") { - return false, fmt.Errorf("expected consul check path to end with /UPDATE but found: %q", found) - } - } - return true, nil - }, func(err error) { - t.Fatalf("err: %v", err) - }) -} - -func TestTaskRunner_SaveRestoreState(t *testing.T) { - t.Parallel() - alloc := mock.Alloc() - task := alloc.Job.TaskGroups[0].Tasks[0] - task.Driver = "mock_driver" - task.Config = map[string]interface{}{ - "exit_code": "0", - "run_for": "5s", - } - - // Give it a Vault token - task.Vault = &structs.Vault{Policies: []string{"default"}} - - ctx := testTaskRunnerFromAlloc(t, false, alloc) - ctx.tr.MarkReceived() - go ctx.tr.Run() - defer ctx.Cleanup() - - // Wait for the task to be running and then snapshot the state - testWaitForTaskToStart(t, ctx) - - if err := ctx.tr.SaveState(); err != nil { - t.Fatalf("err: %v", err) - } - - // Read the token from the file system - tokenPath := filepath.Join(ctx.tr.taskDir.SecretsDir, vaultTokenFile) - data, err := ioutil.ReadFile(tokenPath) - if err != nil { - t.Fatalf("Failed to read file: %v", err) - } - token := string(data) - if len(token) == 0 { - t.Fatalf("Token not written to disk") - } - - // Create a new task runner - task2 := &structs.Task{Name: ctx.tr.task.Name, Driver: ctx.tr.task.Driver, Vault: ctx.tr.task.Vault} - tr2 := NewTaskRunner(ctx.tr.logger, ctx.tr.config, ctx.tr.stateDB, ctx.upd.Update, - ctx.tr.taskDir, ctx.tr.alloc, task2, ctx.tr.vaultClient, ctx.tr.consul) - tr2.restartTracker = noRestartsTracker() - if _, err := tr2.RestoreState(); err != nil { - t.Fatalf("err: %v", err) - } - go tr2.Run() - defer tr2.Destroy(structs.NewTaskEvent(structs.TaskKilled)) - - // Destroy and wait - select { - case <-tr2.WaitCh(): - case <-time.After(time.Duration(testutil.TestMultiplier()*15) * time.Second): - t.Fatalf("timeout") - } - - // Check that we recovered the token - if act := tr2.vaultFuture.Get(); act != token { - t.Fatalf("Vault token not properly recovered") - } -} - -func TestTaskRunner_Download_List(t *testing.T) { - t.Parallel() - ts := httptest.NewServer(http.FileServer(http.Dir(filepath.Dir(".")))) - defer ts.Close() - - // Create an allocation that has a task with a list of artifacts. - alloc := mock.Alloc() - task := alloc.Job.TaskGroups[0].Tasks[0] - task.Driver = "mock_driver" - task.Config = map[string]interface{}{ - "exit_code": "0", - "run_for": "10s", - } - f1 := "task_runner_test.go" - f2 := "task_runner.go" - artifact1 := structs.TaskArtifact{ - GetterSource: fmt.Sprintf("%s/%s", ts.URL, f1), - } - artifact2 := structs.TaskArtifact{ - GetterSource: fmt.Sprintf("%s/%s", ts.URL, f2), - } - task.Artifacts = []*structs.TaskArtifact{&artifact1, &artifact2} - - ctx := testTaskRunnerFromAlloc(t, false, alloc) - ctx.tr.MarkReceived() - go ctx.tr.Run() - defer ctx.Cleanup() - - select { - case <-ctx.tr.WaitCh(): - case <-time.After(time.Duration(testutil.TestMultiplier()*15) * time.Second): - t.Fatalf("timeout") - } - - if len(ctx.upd.events) != 5 { - t.Fatalf("should have 5 ctx.updates: %#v", ctx.upd.events) - } - - if ctx.upd.state != structs.TaskStateDead { - t.Fatalf("TaskState %v; want %v", ctx.upd.state, structs.TaskStateDead) - } - - if ctx.upd.events[0].Type != structs.TaskReceived { - t.Fatalf("First Event was %v; want %v", ctx.upd.events[0].Type, structs.TaskReceived) - } - - if ctx.upd.events[1].Type != structs.TaskSetup { - t.Fatalf("Second Event was %v; want %v", ctx.upd.events[1].Type, structs.TaskSetup) - } - - if ctx.upd.events[2].Type != structs.TaskDownloadingArtifacts { - t.Fatalf("Third Event was %v; want %v", ctx.upd.events[2].Type, structs.TaskDownloadingArtifacts) - } - - if ctx.upd.events[3].Type != structs.TaskStarted { - t.Fatalf("Forth Event was %v; want %v", ctx.upd.events[3].Type, structs.TaskStarted) - } - - if ctx.upd.events[4].Type != structs.TaskTerminated { - t.Fatalf("Fifth Event was %v; want %v", ctx.upd.events[4].Type, structs.TaskTerminated) - } - - // Check that both files exist. - if _, err := os.Stat(filepath.Join(ctx.tr.taskDir.Dir, f1)); err != nil { - t.Fatalf("%v not downloaded", f1) - } - if _, err := os.Stat(filepath.Join(ctx.tr.taskDir.Dir, f2)); err != nil { - t.Fatalf("%v not downloaded", f2) - } -} - -func TestTaskRunner_Download_Retries(t *testing.T) { - t.Parallel() - // Create an allocation that has a task with bad artifacts. - alloc := mock.Alloc() - task := alloc.Job.TaskGroups[0].Tasks[0] - task.Driver = "mock_driver" - task.Config = map[string]interface{}{ - "exit_code": "0", - "run_for": "10s", - } - artifact := structs.TaskArtifact{ - GetterSource: "http://127.0.0.1:0/foo/bar/baz", - } - task.Artifacts = []*structs.TaskArtifact{&artifact} - - // Make the restart policy try one ctx.update - alloc.Job.TaskGroups[0].RestartPolicy = &structs.RestartPolicy{ - Attempts: 1, - Interval: 10 * time.Minute, - Delay: 1 * time.Second, - Mode: structs.RestartPolicyModeFail, - } - - ctx := testTaskRunnerFromAlloc(t, true, alloc) - ctx.tr.MarkReceived() - go ctx.tr.Run() - defer ctx.Cleanup() - - select { - case <-ctx.tr.WaitCh(): - case <-time.After(time.Duration(testutil.TestMultiplier()*15) * time.Second): - t.Fatalf("timeout") - } - - if len(ctx.upd.events) != 8 { - t.Fatalf("should have 8 ctx.updates: %#v", ctx.upd.events) - } - - if ctx.upd.state != structs.TaskStateDead { - t.Fatalf("TaskState %v; want %v", ctx.upd.state, structs.TaskStateDead) - } - - if ctx.upd.events[0].Type != structs.TaskReceived { - t.Fatalf("First Event was %v; want %v", ctx.upd.events[0].Type, structs.TaskReceived) - } - - if ctx.upd.events[1].Type != structs.TaskSetup { - t.Fatalf("Second Event was %v; want %v", ctx.upd.events[1].Type, structs.TaskSetup) - } - - if ctx.upd.events[2].Type != structs.TaskDownloadingArtifacts { - t.Fatalf("Third Event was %v; want %v", ctx.upd.events[2].Type, structs.TaskDownloadingArtifacts) - } - - if ctx.upd.events[3].Type != structs.TaskArtifactDownloadFailed { - t.Fatalf("Fourth Event was %v; want %v", ctx.upd.events[3].Type, structs.TaskArtifactDownloadFailed) - } - - if ctx.upd.events[4].Type != structs.TaskRestarting { - t.Fatalf("Fifth Event was %v; want %v", ctx.upd.events[4].Type, structs.TaskRestarting) - } - - if ctx.upd.events[5].Type != structs.TaskDownloadingArtifacts { - t.Fatalf("Sixth Event was %v; want %v", ctx.upd.events[5].Type, structs.TaskDownloadingArtifacts) - } - - if ctx.upd.events[6].Type != structs.TaskArtifactDownloadFailed { - t.Fatalf("Seventh Event was %v; want %v", ctx.upd.events[6].Type, structs.TaskArtifactDownloadFailed) - } - - if ctx.upd.events[7].Type != structs.TaskNotRestarting { - t.Fatalf("Eighth Event was %v; want %v", ctx.upd.events[7].Type, structs.TaskNotRestarting) - } -} - -// TestTaskRunner_UnregisterConsul_Retries asserts a task is unregistered from -// Consul when waiting to be retried. -func TestTaskRunner_UnregisterConsul_Retries(t *testing.T) { - t.Parallel() - // Create an allocation that has a task with bad artifacts. - alloc := mock.Alloc() - - // Make the restart policy try one ctx.update - alloc.Job.TaskGroups[0].RestartPolicy = &structs.RestartPolicy{ - Attempts: 1, - Interval: 10 * time.Minute, - Delay: time.Nanosecond, - Mode: structs.RestartPolicyModeFail, - } - - task := alloc.Job.TaskGroups[0].Tasks[0] - task.Driver = "mock_driver" - task.Config = map[string]interface{}{ - "exit_code": "1", - "run_for": "1ns", - } - - ctx := testTaskRunnerFromAlloc(t, true, alloc) - - // Use mockConsulServiceClient - consul := consulApi.NewMockConsulServiceClient(t, testlog.HCLogger(t)) - ctx.tr.consul = consul - - ctx.tr.MarkReceived() - ctx.tr.Run() - defer ctx.Cleanup() - - // Assert it is properly registered and unregistered - if expected := 6; len(consul.Ops) != expected { - t.Errorf("expected %d consul ops but found: %d", expected, len(consul.Ops)) - } - if consul.Ops[0].Op != "add" { - t.Errorf("expected first Op to be add but found: %q", consul.Ops[0].Op) - } - if consul.Ops[1].Op != "remove" { - t.Errorf("expected second op to be remove but found: %q", consul.Ops[1].Op) - } - if consul.Ops[2].Op != "remove" { - t.Errorf("expected third op to be remove but found: %q", consul.Ops[2].Op) - } - if consul.Ops[3].Op != "add" { - t.Errorf("expected fourth op to be add but found: %q", consul.Ops[3].Op) - } - if consul.Ops[4].Op != "remove" { - t.Errorf("expected fifth op to be remove but found: %q", consul.Ops[4].Op) - } - if consul.Ops[5].Op != "remove" { - t.Errorf("expected sixth op to be remove but found: %q", consul.Ops[5].Op) - } -} - -//XXX Ported to allocrunner/task_runner/validate_hook_test.go -func TestTaskRunner_Validate_UserEnforcement(t *testing.T) { - t.Parallel() - ctx := testTaskRunner(t, false) - defer ctx.Cleanup() - - // Try to run as root with exec. - ctx.tr.task.Driver = "exec" - ctx.tr.task.User = "root" - if err := ctx.tr.validateTask(); err == nil { - t.Fatalf("expected error running as root with exec") - } - - // Try to run a non-blacklisted user with exec. - ctx.tr.task.Driver = "exec" - ctx.tr.task.User = "foobar" - if err := ctx.tr.validateTask(); err != nil { - t.Fatalf("unexpected error: %v", err) - } - - // Try to run as root with docker. - ctx.tr.task.Driver = "docker" - ctx.tr.task.User = "root" - if err := ctx.tr.validateTask(); err != nil { - t.Fatalf("unexpected error: %v", err) - } -} - -func TestTaskRunner_RestartTask(t *testing.T) { - t.Parallel() - alloc := mock.Alloc() - task := alloc.Job.TaskGroups[0].Tasks[0] - task.Driver = "mock_driver" - task.Config = map[string]interface{}{ - "exit_code": "0", - "run_for": "100s", - } - - ctx := testTaskRunnerFromAlloc(t, true, alloc) - ctx.tr.MarkReceived() - go ctx.tr.Run() - defer ctx.Cleanup() - - // Wait for it to start - go func() { - testWaitForTaskToStart(t, ctx) - ctx.tr.Restart("test", "restart", false) - - // Wait for it to restart then kill - go func() { - // Wait for the task to start again - testutil.WaitForResult(func() (bool, error) { - if len(ctx.upd.events) != 8 { - return false, fmt.Errorf("task %q in alloc %q should have 8 ctx.updates: %#v", task.Name, alloc.ID, ctx.upd.events) - } - - return true, nil - }, func(err error) { - t.Fatalf("err: %v", err) - }) - ctx.tr.Kill("test", "restart", false) - }() - }() - - select { - case <-ctx.tr.WaitCh(): - case <-time.After(time.Duration(testutil.TestMultiplier()*15) * time.Second): - t.Fatalf("timeout") - } - - if len(ctx.upd.events) != 10 { - t.Fatalf("should have 10 ctx.updates: %#v", ctx.upd.events) - } - - if ctx.upd.state != structs.TaskStateDead { - t.Fatalf("TaskState %v; want %v", ctx.upd.state, structs.TaskStateDead) - } - - if ctx.upd.events[0].Type != structs.TaskReceived { - t.Fatalf("First Event was %v; want %v", ctx.upd.events[0].Type, structs.TaskReceived) - } - - if ctx.upd.events[1].Type != structs.TaskSetup { - t.Fatalf("Second Event was %v; want %v", ctx.upd.events[1].Type, structs.TaskSetup) - } - - if ctx.upd.events[2].Type != structs.TaskStarted { - t.Fatalf("Third Event was %v; want %v", ctx.upd.events[2].Type, structs.TaskStarted) - } - - if ctx.upd.events[3].Type != structs.TaskRestartSignal { - t.Fatalf("Fourth Event was %v; want %v", ctx.upd.events[3].Type, structs.TaskRestartSignal) - } - - if ctx.upd.events[4].Type != structs.TaskKilling { - t.Fatalf("Fifth Event was %v; want %v", ctx.upd.events[4].Type, structs.TaskKilling) - } - - if ctx.upd.events[5].Type != structs.TaskKilled { - t.Fatalf("Sixth Event was %v; want %v", ctx.upd.events[5].Type, structs.TaskKilled) - } - - if ctx.upd.events[6].Type != structs.TaskRestarting { - t.Fatalf("Seventh Event was %v; want %v", ctx.upd.events[6].Type, structs.TaskRestarting) - } - - if ctx.upd.events[7].Type != structs.TaskStarted { - t.Fatalf("Eighth Event was %v; want %v", ctx.upd.events[8].Type, structs.TaskStarted) - } - if ctx.upd.events[8].Type != structs.TaskKilling { - t.Fatalf("Ninth Event was %v; want %v", ctx.upd.events[8].Type, structs.TaskKilling) - } - - if ctx.upd.events[9].Type != structs.TaskKilled { - t.Fatalf("Tenth Event was %v; want %v", ctx.upd.events[9].Type, structs.TaskKilled) - } -} - -func TestTaskRunner_KillTask(t *testing.T) { - t.Parallel() - alloc := mock.Alloc() - task := alloc.Job.TaskGroups[0].Tasks[0] - task.Driver = "mock_driver" - task.Config = map[string]interface{}{ - "exit_code": "0", - "run_for": "10s", - } - - ctx := testTaskRunnerFromAlloc(t, false, alloc) - ctx.tr.MarkReceived() - go ctx.tr.Run() - defer ctx.Cleanup() - - go func() { - testWaitForTaskToStart(t, ctx) - ctx.tr.Kill("test", "kill", true) - }() - - select { - case <-ctx.tr.WaitCh(): - case <-time.After(time.Duration(testutil.TestMultiplier()*15) * time.Second): - t.Fatalf("timeout") - } - - if len(ctx.upd.events) != 5 { - t.Fatalf("should have 4 ctx.updates: %#v", ctx.upd.events) - } - - if ctx.upd.state != structs.TaskStateDead { - t.Fatalf("TaskState %v; want %v", ctx.upd.state, structs.TaskStateDead) - } - - if !ctx.upd.failed { - t.Fatalf("TaskState should be failed: %+v", ctx.upd) - } - - if ctx.upd.events[0].Type != structs.TaskReceived { - t.Fatalf("First Event was %v; want %v", ctx.upd.events[0].Type, structs.TaskReceived) - } - - if ctx.upd.events[1].Type != structs.TaskSetup { - t.Fatalf("Second Event was %v; want %v", ctx.upd.events[1].Type, structs.TaskSetup) - } - - if ctx.upd.events[2].Type != structs.TaskStarted { - t.Fatalf("Third Event was %v; want %v", ctx.upd.events[2].Type, structs.TaskStarted) - } - - if ctx.upd.events[3].Type != structs.TaskKilling { - t.Fatalf("Fourth Event was %v; want %v", ctx.upd.events[3].Type, structs.TaskKilling) - } - - if ctx.upd.events[4].Type != structs.TaskKilled { - t.Fatalf("Fifth Event was %v; want %v", ctx.upd.events[4].Type, structs.TaskKilled) - } -} - -func TestTaskRunner_SignalFailure(t *testing.T) { - t.Parallel() - alloc := mock.Alloc() - task := alloc.Job.TaskGroups[0].Tasks[0] - task.Driver = "mock_driver" - task.Config = map[string]interface{}{ - "exit_code": "0", - "run_for": "10s", - "signal_error": "test forcing failure", - } - - ctx := testTaskRunnerFromAlloc(t, false, alloc) - ctx.tr.MarkReceived() - go ctx.tr.Run() - defer ctx.Cleanup() - - // Wait for the task to start - testWaitForTaskToStart(t, ctx) - - if err := ctx.tr.Signal("test", "test", syscall.SIGINT); err == nil { - t.Fatalf("Didn't receive error") - } -} - -func TestTaskRunner_BlockForVault(t *testing.T) { - t.Parallel() - alloc := mock.Alloc() - task := alloc.Job.TaskGroups[0].Tasks[0] - task.Driver = "mock_driver" - task.Config = map[string]interface{}{ - "exit_code": "0", - "run_for": "1s", - } - task.Vault = &structs.Vault{Policies: []string{"default"}} - - ctx := testTaskRunnerFromAlloc(t, false, alloc) - ctx.tr.MarkReceived() - defer ctx.Cleanup() - - // Control when we get a Vault token - token := "1234" - waitCh := make(chan struct{}) - handler := func(*structs.Allocation, []string) (map[string]string, error) { - <-waitCh - return map[string]string{task.Name: token}, nil - } - ctx.tr.vaultClient.(*vaultclient.MockVaultClient).DeriveTokenFn = handler - - go ctx.tr.Run() - - select { - case <-ctx.tr.WaitCh(): - t.Fatalf("premature exit") - case <-time.After(1 * time.Second): - } - - if len(ctx.upd.events) != 2 { - t.Fatalf("should have 2 ctx.updates: %#v", ctx.upd.events) - } - - if ctx.upd.state != structs.TaskStatePending { - t.Fatalf("TaskState %v; want %v", ctx.upd.state, structs.TaskStatePending) - } - - if ctx.upd.events[0].Type != structs.TaskReceived { - t.Fatalf("First Event was %v; want %v", ctx.upd.events[0].Type, structs.TaskReceived) - } - - if ctx.upd.events[1].Type != structs.TaskSetup { - t.Fatalf("Second Event was %v; want %v", ctx.upd.events[1].Type, structs.TaskSetup) - } - - // Unblock - close(waitCh) - - select { - case <-ctx.tr.WaitCh(): - case <-time.After(time.Duration(testutil.TestMultiplier()*15) * time.Second): - t.Fatalf("timeout") - } - - if len(ctx.upd.events) != 4 { - t.Fatalf("should have 4 ctx.updates: %#v", ctx.upd.events) - } - - if ctx.upd.state != structs.TaskStateDead { - t.Fatalf("TaskState %v; want %v", ctx.upd.state, structs.TaskStateDead) - } - - if ctx.upd.events[0].Type != structs.TaskReceived { - t.Fatalf("First Event was %v; want %v", ctx.upd.events[0].Type, structs.TaskReceived) - } - - if ctx.upd.events[1].Type != structs.TaskSetup { - t.Fatalf("Second Event was %v; want %v", ctx.upd.events[1].Type, structs.TaskSetup) - } - - if ctx.upd.events[2].Type != structs.TaskStarted { - t.Fatalf("Third Event was %v; want %v", ctx.upd.events[2].Type, structs.TaskStarted) - } - - if ctx.upd.events[3].Type != structs.TaskTerminated { - t.Fatalf("Fourth Event was %v; want %v", ctx.upd.events[3].Type, structs.TaskTerminated) - } - - // Check that the token is on disk - tokenPath := filepath.Join(ctx.tr.taskDir.SecretsDir, vaultTokenFile) - data, err := ioutil.ReadFile(tokenPath) - if err != nil { - t.Fatalf("Failed to read file: %v", err) - } - - if act := string(data); act != token { - t.Fatalf("Token didn't get written to disk properly, got %q; want %q", act, token) - } - - // Check the token was revoked - m := ctx.tr.vaultClient.(*vaultclient.MockVaultClient) - testutil.WaitForResult(func() (bool, error) { - if len(m.StoppedTokens) != 1 { - return false, fmt.Errorf("Expected a stopped token: %v", m.StoppedTokens) - } - - if a := m.StoppedTokens[0]; a != token { - return false, fmt.Errorf("got stopped token %q; want %q", a, token) - } - return true, nil - }, func(err error) { - t.Fatalf("err: %v", err) - }) -} - -func TestTaskRunner_DeriveToken_Retry(t *testing.T) { - t.Parallel() - alloc := mock.Alloc() - task := alloc.Job.TaskGroups[0].Tasks[0] - task.Driver = "mock_driver" - task.Config = map[string]interface{}{ - "exit_code": "0", - "run_for": "1s", - } - task.Vault = &structs.Vault{Policies: []string{"default"}} - - ctx := testTaskRunnerFromAlloc(t, false, alloc) - ctx.tr.MarkReceived() - defer ctx.Cleanup() - - // Control when we get a Vault token - token := "1234" - count := 0 - handler := func(*structs.Allocation, []string) (map[string]string, error) { - if count > 0 { - return map[string]string{task.Name: token}, nil - } - - count++ - return nil, structs.NewRecoverableError(fmt.Errorf("Want a retry"), true) - } - ctx.tr.vaultClient.(*vaultclient.MockVaultClient).DeriveTokenFn = handler - go ctx.tr.Run() - - select { - case <-ctx.tr.WaitCh(): - case <-time.After(time.Duration(testutil.TestMultiplier()*15) * time.Second): - t.Fatalf("timeout") - } - - if len(ctx.upd.events) != 4 { - t.Fatalf("should have 4 ctx.updates: %#v", ctx.upd.events) - } - - if ctx.upd.state != structs.TaskStateDead { - t.Fatalf("TaskState %v; want %v", ctx.upd.state, structs.TaskStateDead) - } - - if ctx.upd.events[0].Type != structs.TaskReceived { - t.Fatalf("First Event was %v; want %v", ctx.upd.events[0].Type, structs.TaskReceived) - } - - if ctx.upd.events[1].Type != structs.TaskSetup { - t.Fatalf("Second Event was %v; want %v", ctx.upd.events[1].Type, structs.TaskSetup) - } - - if ctx.upd.events[2].Type != structs.TaskStarted { - t.Fatalf("Third Event was %v; want %v", ctx.upd.events[2].Type, structs.TaskStarted) - } - - if ctx.upd.events[3].Type != structs.TaskTerminated { - t.Fatalf("Fourth Event was %v; want %v", ctx.upd.events[3].Type, structs.TaskTerminated) - } - - // Check that the token is on disk - tokenPath := filepath.Join(ctx.tr.taskDir.SecretsDir, vaultTokenFile) - data, err := ioutil.ReadFile(tokenPath) - if err != nil { - t.Fatalf("Failed to read file: %v", err) - } - - if act := string(data); act != token { - t.Fatalf("Token didn't get written to disk properly, got %q; want %q", act, token) - } - - // Check the token was revoked - m := ctx.tr.vaultClient.(*vaultclient.MockVaultClient) - testutil.WaitForResult(func() (bool, error) { - if len(m.StoppedTokens) != 1 { - return false, fmt.Errorf("Expected a stopped token: %v", m.StoppedTokens) - } - - if a := m.StoppedTokens[0]; a != token { - return false, fmt.Errorf("got stopped token %q; want %q", a, token) - } - return true, nil - }, func(err error) { - t.Fatalf("err: %v", err) - }) -} - -func TestTaskRunner_DeriveToken_Unrecoverable(t *testing.T) { - t.Parallel() - alloc := mock.Alloc() - task := alloc.Job.TaskGroups[0].Tasks[0] - task.Driver = "mock_driver" - task.Config = map[string]interface{}{ - "exit_code": "0", - "run_for": "10s", - } - task.Vault = &structs.Vault{ - Policies: []string{"default"}, - ChangeMode: structs.VaultChangeModeRestart, - } - - ctx := testTaskRunnerFromAlloc(t, false, alloc) - ctx.tr.MarkReceived() - defer ctx.Cleanup() - - // Error the token derivation - vc := ctx.tr.vaultClient.(*vaultclient.MockVaultClient) - vc.SetDeriveTokenError(alloc.ID, []string{task.Name}, fmt.Errorf("Non recoverable")) - go ctx.tr.Run() - - // Wait for the task to start - testutil.WaitForResult(func() (bool, error) { - if l := len(ctx.upd.events); l != 3 { - return false, fmt.Errorf("Expect 3 events; got %v", l) - } - - if ctx.upd.events[0].Type != structs.TaskReceived { - return false, fmt.Errorf("First Event was %v; want %v", ctx.upd.events[0].Type, structs.TaskReceived) - } - - if ctx.upd.events[1].Type != structs.TaskSetup { - return false, fmt.Errorf("Second Event was %v; want %v", ctx.upd.events[1].Type, structs.TaskSetup) - } - - if ctx.upd.events[2].Type != structs.TaskKilling { - return false, fmt.Errorf("Third Event was %v; want %v", ctx.upd.events[2].Type, structs.TaskKilling) - } - - return true, nil - }, func(err error) { - t.Fatalf("err: %v", err) - }) -} - -func TestTaskRunner_Template_Block(t *testing.T) { - t.Parallel() - alloc := mock.Alloc() - task := alloc.Job.TaskGroups[0].Tasks[0] - task.Driver = "mock_driver" - task.Config = map[string]interface{}{ - "exit_code": "0", - "run_for": "1s", - } - task.Templates = []*structs.Template{ - { - EmbeddedTmpl: "{{key \"foo\"}}", - DestPath: "local/test", - ChangeMode: structs.TemplateChangeModeNoop, - }, - } - - ctx := testTaskRunnerFromAlloc(t, false, alloc) - ctx.tr.MarkReceived() - go ctx.tr.Run() - defer ctx.Cleanup() - - select { - case <-ctx.tr.WaitCh(): - t.Fatalf("premature exit") - case <-time.After(1 * time.Second): - } - - if len(ctx.upd.events) != 2 { - t.Fatalf("should have 2 ctx.updates: %#v", ctx.upd.events) - } - - if ctx.upd.state != structs.TaskStatePending { - t.Fatalf("TaskState %v; want %v", ctx.upd.state, structs.TaskStatePending) - } - - if ctx.upd.events[0].Type != structs.TaskReceived { - t.Fatalf("First Event was %v; want %v", ctx.upd.events[0].Type, structs.TaskReceived) - } - - if ctx.upd.events[1].Type != structs.TaskSetup { - t.Fatalf("Second Event was %v; want %v", ctx.upd.events[1].Type, structs.TaskSetup) - } - - // Unblock - ctx.tr.UnblockStart("test") - - select { - case <-ctx.tr.WaitCh(): - case <-time.After(time.Duration(testutil.TestMultiplier()*15) * time.Second): - t.Fatalf("timeout") - } - - if len(ctx.upd.events) != 4 { - t.Fatalf("should have 4 ctx.updates: %#v", ctx.upd.events) - } - - if ctx.upd.state != structs.TaskStateDead { - t.Fatalf("TaskState %v; want %v", ctx.upd.state, structs.TaskStateDead) - } - - if ctx.upd.events[0].Type != structs.TaskReceived { - t.Fatalf("First Event was %v; want %v", ctx.upd.events[0].Type, structs.TaskReceived) - } - - if ctx.upd.events[1].Type != structs.TaskSetup { - t.Fatalf("Second Event was %v; want %v", ctx.upd.events[1].Type, structs.TaskSetup) - } - - if ctx.upd.events[2].Type != structs.TaskStarted { - t.Fatalf("Third Event was %v; want %v", ctx.upd.events[2].Type, structs.TaskStarted) - } - - if ctx.upd.events[3].Type != structs.TaskTerminated { - t.Fatalf("Fourth Event was %v; want %v", ctx.upd.events[3].Type, structs.TaskTerminated) - } -} - -func TestTaskRunner_Template_Artifact(t *testing.T) { - t.Parallel() - dir, err := os.Getwd() - if err != nil { - t.Fatalf("bad: %v", err) - } - - ts := httptest.NewServer(http.FileServer(http.Dir(filepath.Join(dir, "../../..")))) - defer ts.Close() - - alloc := mock.Alloc() - task := alloc.Job.TaskGroups[0].Tasks[0] - task.Driver = "mock_driver" - task.Config = map[string]interface{}{ - "exit_code": "0", - "run_for": "1s", - } - // Create an allocation that has a task that renders a template from an - // artifact - f1 := "CHANGELOG.md" - artifact := structs.TaskArtifact{ - GetterSource: fmt.Sprintf("%s/%s", ts.URL, f1), - } - task.Artifacts = []*structs.TaskArtifact{&artifact} - task.Templates = []*structs.Template{ - { - SourcePath: "CHANGELOG.md", - DestPath: "local/test", - ChangeMode: structs.TemplateChangeModeNoop, - }, - } - - ctx := testTaskRunnerFromAlloc(t, false, alloc) - ctx.tr.MarkReceived() - defer ctx.Cleanup() - go ctx.tr.Run() - - select { - case <-ctx.tr.WaitCh(): - case <-time.After(time.Duration(testutil.TestMultiplier()*15) * time.Second): - t.Fatalf("timeout") - } - - if len(ctx.upd.events) != 5 { - t.Fatalf("should have 5 ctx.updates: %#v", ctx.upd.events) - } - - if ctx.upd.state != structs.TaskStateDead { - t.Fatalf("TaskState %v; want %v", ctx.upd.state, structs.TaskStateDead) - } - - if ctx.upd.events[0].Type != structs.TaskReceived { - t.Fatalf("First Event was %v; want %v", ctx.upd.events[0].Type, structs.TaskReceived) - } - - if ctx.upd.events[1].Type != structs.TaskSetup { - t.Fatalf("Second Event was %v; want %v", ctx.upd.events[1].Type, structs.TaskSetup) - } - - if ctx.upd.events[2].Type != structs.TaskDownloadingArtifacts { - t.Fatalf("Third Event was %v; want %v", ctx.upd.events[2].Type, structs.TaskDownloadingArtifacts) - } - - if ctx.upd.events[3].Type != structs.TaskStarted { - t.Fatalf("Fourth Event was %v; want %v", ctx.upd.events[3].Type, structs.TaskStarted) - } - - if ctx.upd.events[4].Type != structs.TaskTerminated { - t.Fatalf("Fifth Event was %v; want %v", ctx.upd.events[4].Type, structs.TaskTerminated) - } - - // Check that both files exist. - if _, err := os.Stat(filepath.Join(ctx.tr.taskDir.Dir, f1)); err != nil { - t.Fatalf("%v not downloaded", f1) - } - if _, err := os.Stat(filepath.Join(ctx.tr.taskDir.LocalDir, "test")); err != nil { - t.Fatalf("template not rendered") - } -} - -func TestTaskRunner_Template_NewVaultToken(t *testing.T) { - t.Parallel() - alloc := mock.Alloc() - task := alloc.Job.TaskGroups[0].Tasks[0] - task.Driver = "mock_driver" - task.Config = map[string]interface{}{ - "exit_code": "0", - "run_for": "1s", - } - task.Templates = []*structs.Template{ - { - EmbeddedTmpl: "{{key \"foo\"}}", - DestPath: "local/test", - ChangeMode: structs.TemplateChangeModeNoop, - }, - } - task.Vault = &structs.Vault{Policies: []string{"default"}} - - ctx := testTaskRunnerFromAlloc(t, false, alloc) - ctx.tr.MarkReceived() - defer ctx.Cleanup() - go ctx.tr.Run() - - // Wait for a Vault token - var token string - testutil.WaitForResult(func() (bool, error) { - if token = ctx.tr.vaultFuture.Get(); token == "" { - return false, fmt.Errorf("No Vault token") - } - - return true, nil - }, func(err error) { - t.Fatalf("err: %v", err) - }) - - // Error the token renewal - renewalCh, ok := ctx.vault.RenewTokens[token] - if !ok { - t.Fatalf("no renewal channel") - } - - originalManager := ctx.tr.templateManager - - renewalCh <- fmt.Errorf("Test killing") - close(renewalCh) - - // Wait for a new Vault token - var token2 string - testutil.WaitForResult(func() (bool, error) { - if token2 = ctx.tr.vaultFuture.Get(); token2 == "" || token2 == token { - return false, fmt.Errorf("No new Vault token") - } - - if originalManager == ctx.tr.templateManager { - return false, fmt.Errorf("Template manager not ctx.updated") - } - - return true, nil - }, func(err error) { - t.Fatalf("err: %v", err) - }) - - // Check the token was revoked - testutil.WaitForResult(func() (bool, error) { - if len(ctx.vault.StoppedTokens) != 1 { - return false, fmt.Errorf("Expected a stopped token: %v", ctx.vault.StoppedTokens) - } - - if a := ctx.vault.StoppedTokens[0]; a != token { - return false, fmt.Errorf("got stopped token %q; want %q", a, token) - } - return true, nil - }, func(err error) { - t.Fatalf("err: %v", err) - }) -} - -func TestTaskRunner_VaultManager_Restart(t *testing.T) { - t.Parallel() - alloc := mock.Alloc() - task := alloc.Job.TaskGroups[0].Tasks[0] - task.Driver = "mock_driver" - task.Config = map[string]interface{}{ - "exit_code": "0", - "run_for": "10s", - } - task.Vault = &structs.Vault{ - Policies: []string{"default"}, - ChangeMode: structs.VaultChangeModeRestart, - } - - ctx := testTaskRunnerFromAlloc(t, false, alloc) - ctx.tr.MarkReceived() - defer ctx.Cleanup() - go ctx.tr.Run() - - // Wait for the task to start - testWaitForTaskToStart(t, ctx) - - // Error the token renewal - renewalCh, ok := ctx.vault.RenewTokens[ctx.tr.vaultFuture.Get()] - if !ok { - t.Fatalf("no renewal channel") - } - - renewalCh <- fmt.Errorf("Test killing") - close(renewalCh) - - // Ensure a restart - testutil.WaitForResult(func() (bool, error) { - if l := len(ctx.upd.events); l != 8 { - return false, fmt.Errorf("Expect eight events; got %#v", ctx.upd.events) - } - - if ctx.upd.events[0].Type != structs.TaskReceived { - return false, fmt.Errorf("First Event was %v; want %v", ctx.upd.events[0].Type, structs.TaskReceived) - } - - if ctx.upd.events[1].Type != structs.TaskSetup { - return false, fmt.Errorf("Second Event was %v; want %v", ctx.upd.events[1].Type, structs.TaskStarted) - } - - if ctx.upd.events[2].Type != structs.TaskStarted { - return false, fmt.Errorf("Third Event was %v; want %v", ctx.upd.events[2].Type, structs.TaskStarted) - } - - if ctx.upd.events[3].Type != structs.TaskRestartSignal { - return false, fmt.Errorf("Fourth Event was %v; want %v", ctx.upd.events[3].Type, structs.TaskRestartSignal) - } - - if ctx.upd.events[4].Type != structs.TaskKilling { - return false, fmt.Errorf("Fifth Event was %v; want %v", ctx.upd.events[4].Type, structs.TaskKilling) - } - - if ctx.upd.events[5].Type != structs.TaskKilled { - return false, fmt.Errorf("Sixth Event was %v; want %v", ctx.upd.events[5].Type, structs.TaskKilled) - } - - if ctx.upd.events[6].Type != structs.TaskRestarting { - return false, fmt.Errorf("Seventh Event was %v; want %v", ctx.upd.events[6].Type, structs.TaskRestarting) - } - - if ctx.upd.events[7].Type != structs.TaskStarted { - return false, fmt.Errorf("Eight Event was %v; want %v", ctx.upd.events[7].Type, structs.TaskStarted) - } - - return true, nil - }, func(err error) { - t.Fatalf("err: %v", err) - }) -} - -func TestTaskRunner_VaultManager_Signal(t *testing.T) { - t.Parallel() - alloc := mock.Alloc() - task := alloc.Job.TaskGroups[0].Tasks[0] - task.Driver = "mock_driver" - task.Config = map[string]interface{}{ - "exit_code": "0", - "run_for": "10s", - } - task.Vault = &structs.Vault{ - Policies: []string{"default"}, - ChangeMode: structs.VaultChangeModeSignal, - ChangeSignal: "SIGUSR1", - } - - ctx := testTaskRunnerFromAlloc(t, false, alloc) - ctx.tr.MarkReceived() - go ctx.tr.Run() - defer ctx.Cleanup() - - // Wait for the task to start - testWaitForTaskToStart(t, ctx) - - // Error the token renewal - renewalCh, ok := ctx.vault.RenewTokens[ctx.tr.vaultFuture.Get()] - if !ok { - t.Fatalf("no renewal channel") - } - - renewalCh <- fmt.Errorf("Test killing") - close(renewalCh) - - // Ensure a restart - testutil.WaitForResult(func() (bool, error) { - if l := len(ctx.upd.events); l != 4 { - return false, fmt.Errorf("Expect four events; got %#v", ctx.upd.events) - } - - if ctx.upd.events[0].Type != structs.TaskReceived { - return false, fmt.Errorf("First Event was %v; want %v", ctx.upd.events[0].Type, structs.TaskReceived) - } - - if ctx.upd.events[1].Type != structs.TaskSetup { - return false, fmt.Errorf("Second Event was %v; want %v", ctx.upd.events[1].Type, structs.TaskSetup) - } - - if ctx.upd.events[2].Type != structs.TaskStarted { - return false, fmt.Errorf("Third Event was %v; want %v", ctx.upd.events[2].Type, structs.TaskStarted) - } - - if ctx.upd.events[3].Type != structs.TaskSignaling { - return false, fmt.Errorf("Fourth Event was %v; want %v", ctx.upd.events[3].Type, structs.TaskSignaling) - } - - return true, nil - }, func(err error) { - t.Fatalf("err: %v", err) - }) -} - -// Test that the payload is written to disk -func TestTaskRunner_SimpleRun_Dispatch(t *testing.T) { - t.Parallel() - alloc := mock.Alloc() - task := alloc.Job.TaskGroups[0].Tasks[0] - task.Driver = "mock_driver" - task.Config = map[string]interface{}{ - "exit_code": "0", - "run_for": "1s", - } - fileName := "test" - task.DispatchPayload = &structs.DispatchPayloadConfig{ - File: fileName, - } - alloc.Job.ParameterizedJob = &structs.ParameterizedJobConfig{} - - // Add an encrypted payload - expected := []byte("hello world") - compressed := snappy.Encode(nil, expected) - alloc.Job.Payload = compressed - - ctx := testTaskRunnerFromAlloc(t, false, alloc) - ctx.tr.MarkReceived() - defer ctx.tr.Destroy(structs.NewTaskEvent(structs.TaskKilled)) - defer ctx.allocDir.Destroy() - go ctx.tr.Run() - - select { - case <-ctx.tr.WaitCh(): - case <-time.After(time.Duration(testutil.TestMultiplier()*15) * time.Second): - t.Fatalf("timeout") - } - - if len(ctx.upd.events) != 4 { - t.Fatalf("should have 4 updates: %#v", ctx.upd.events) - } - - if ctx.upd.state != structs.TaskStateDead { - t.Fatalf("TaskState %v; want %v", ctx.upd.state, structs.TaskStateDead) - } - - if ctx.upd.events[0].Type != structs.TaskReceived { - t.Fatalf("First Event was %v; want %v", ctx.upd.events[0].Type, structs.TaskReceived) - } - - if ctx.upd.events[1].Type != structs.TaskSetup { - t.Fatalf("Second Event was %v; want %v", ctx.upd.events[1].Type, structs.TaskSetup) - } - - if ctx.upd.events[2].Type != structs.TaskStarted { - t.Fatalf("Third Event was %v; want %v", ctx.upd.events[2].Type, structs.TaskStarted) - } - - if ctx.upd.events[3].Type != structs.TaskTerminated { - t.Fatalf("Fourth Event was %v; want %v", ctx.upd.events[3].Type, structs.TaskTerminated) - } - - // Check that the file was written to disk properly - payloadPath := filepath.Join(ctx.tr.taskDir.LocalDir, fileName) - data, err := ioutil.ReadFile(payloadPath) - if err != nil { - t.Fatalf("Failed to read file: %v", err) - } - if !reflect.DeepEqual(data, expected) { - t.Fatalf("Bad; got %v; want %v", string(data), string(expected)) - } -} - -// TestTaskRunner_CleanupEmpty ensures TaskRunner works when createdResources -// is empty. -func TestTaskRunner_CleanupEmpty(t *testing.T) { - t.Parallel() - alloc := mock.Alloc() - task := alloc.Job.TaskGroups[0].Tasks[0] - task.Driver = "mock_driver" - - ctx := testTaskRunnerFromAlloc(t, false, alloc) - ctx.tr.MarkReceived() - - defer ctx.Cleanup() - ctx.tr.Run() - - // Since we only failed once, createdResources should be empty - if len(ctx.tr.createdResources.Resources) != 0 { - t.Fatalf("createdResources should still be empty: %v", ctx.tr.createdResources) - } -} - -func TestTaskRunner_CleanupOK(t *testing.T) { - t.Parallel() - alloc := mock.Alloc() - task := alloc.Job.TaskGroups[0].Tasks[0] - task.Driver = "mock_driver" - key := "ERR" - - ctx := testTaskRunnerFromAlloc(t, false, alloc) - ctx.tr.config.Options = map[string]string{ - "cleanup_fail_on": key, - "cleanup_fail_num": "1", - } - ctx.tr.MarkReceived() - - ctx.tr.createdResources.Resources[key] = []string{"x", "y"} - ctx.tr.createdResources.Resources["foo"] = []string{"z"} - - defer ctx.Cleanup() - ctx.tr.Run() - - // Since we only failed once, createdResources should be empty - if len(ctx.tr.createdResources.Resources) > 0 { - t.Fatalf("expected all created resources to be removed: %#v", ctx.tr.createdResources.Resources) - } -} - -func TestTaskRunner_CleanupFail(t *testing.T) { - t.Parallel() - alloc := mock.Alloc() - task := alloc.Job.TaskGroups[0].Tasks[0] - task.Driver = "mock_driver" - key := "ERR" - ctx := testTaskRunnerFromAlloc(t, false, alloc) - ctx.tr.config.Options = map[string]string{ - "cleanup_fail_on": key, - "cleanup_fail_num": "5", - } - ctx.tr.MarkReceived() - - ctx.tr.createdResources.Resources[key] = []string{"x"} - ctx.tr.createdResources.Resources["foo"] = []string{"y", "z"} - - defer ctx.Cleanup() - ctx.tr.Run() - - // Since we failed > 3 times, the failed key should remain - expected := map[string][]string{key: {"x"}} - if !reflect.DeepEqual(expected, ctx.tr.createdResources.Resources) { - t.Fatalf("expected %#v but found: %#v", expected, ctx.tr.createdResources.Resources) - } -} - -func TestTaskRunner_Pre06ScriptCheck(t *testing.T) { - t.Parallel() - run := func(ver, driver, checkType string, exp bool) (string, func(t *testing.T)) { - name := fmt.Sprintf("%s %s %s returns %t", ver, driver, checkType, exp) - return name, func(t *testing.T) { - services := []*structs.Service{ - { - Checks: []*structs.ServiceCheck{ - { - Type: checkType, - }, - }, - }, - } - if act := pre06ScriptCheck(ver, driver, services); act != exp { - t.Errorf("expected %t received %t", exp, act) - } - } - } - t.Run(run("0.5.6", "exec", "script", true)) - t.Run(run("0.5.6", "java", "script", true)) - t.Run(run("0.5.6", "mock_driver", "script", true)) - t.Run(run("0.5.9", "exec", "script", true)) - t.Run(run("0.5.9", "java", "script", true)) - t.Run(run("0.5.9", "mock_driver", "script", true)) - - t.Run(run("0.6.0dev", "exec", "script", false)) - t.Run(run("0.6.0dev", "java", "script", false)) - t.Run(run("0.6.0dev", "mock_driver", "script", false)) - t.Run(run("0.6.0", "exec", "script", false)) - t.Run(run("0.6.0", "java", "script", false)) - t.Run(run("0.6.0", "mock_driver", "script", false)) - t.Run(run("1.0.0", "exec", "script", false)) - t.Run(run("1.0.0", "java", "script", false)) - t.Run(run("1.0.0", "mock_driver", "script", false)) - - t.Run(run("0.5.6", "rkt", "script", false)) - t.Run(run("0.5.6", "docker", "script", false)) - t.Run(run("0.5.6", "qemu", "script", false)) - t.Run(run("0.5.6", "raw_exec", "script", false)) - t.Run(run("0.5.6", "invalid", "script", false)) - - t.Run(run("0.5.6", "exec", "tcp", false)) - t.Run(run("0.5.6", "java", "tcp", false)) - t.Run(run("0.5.6", "mock_driver", "tcp", false)) -} - -func TestTaskRunner_interpolateServices(t *testing.T) { - t.Parallel() - task := &structs.Task{ - Services: []*structs.Service{ - { - Name: "${name}", - PortLabel: "${portlabel}", - Tags: []string{"${tags}"}, - Checks: []*structs.ServiceCheck{ - { - Name: "${checkname}", - Type: "${checktype}", - Command: "${checkcmd}", - Args: []string{"${checkarg}"}, - Path: "${checkstr}", - Protocol: "${checkproto}", - PortLabel: "${checklabel}", - InitialStatus: "${checkstatus}", - Method: "${checkmethod}", - Header: map[string][]string{ - "${checkheaderk}": {"${checkheaderv}"}, - }, - }, - }, - }, - }, - } - - env := &taskenv.TaskEnv{ - EnvMap: map[string]string{ - "name": "name", - "portlabel": "portlabel", - "tags": "tags", - "checkname": "checkname", - "checktype": "checktype", - "checkcmd": "checkcmd", - "checkarg": "checkarg", - "checkstr": "checkstr", - "checkpath": "checkpath", - "checkproto": "checkproto", - "checklabel": "checklabel", - "checkstatus": "checkstatus", - "checkmethod": "checkmethod", - "checkheaderk": "checkheaderk", - "checkheaderv": "checkheaderv", - }, - } - - interpTask := interpolateServices(env, task) - - exp := &structs.Task{ - Services: []*structs.Service{ - { - Name: "name", - PortLabel: "portlabel", - Tags: []string{"tags"}, - Checks: []*structs.ServiceCheck{ - { - Name: "checkname", - Type: "checktype", - Command: "checkcmd", - Args: []string{"checkarg"}, - Path: "checkstr", - Protocol: "checkproto", - PortLabel: "checklabel", - InitialStatus: "checkstatus", - Method: "checkmethod", - Header: map[string][]string{ - "checkheaderk": {"checkheaderv"}, - }, - }, - }, - }, - }, - } - - if diff := pretty.Diff(interpTask, exp); len(diff) > 0 { - t.Fatalf("diff:\n%s\n", strings.Join(diff, "\n")) - } -} - -func TestTaskRunner_ShutdownDelay(t *testing.T) { - t.Parallel() - - alloc := mock.Alloc() - task := alloc.Job.TaskGroups[0].Tasks[0] - task.Services[0].Tags = []string{"tag1"} - task.Services = task.Services[:1] // only need 1 for this test - task.Driver = "mock_driver" - task.Config = map[string]interface{}{ - "run_for": "1000s", - } - - // No shutdown escape hatch for this delay, so don't set it too high - task.ShutdownDelay = 500 * time.Duration(testutil.TestMultiplier()) * time.Millisecond - - ctx := testTaskRunnerFromAlloc(t, true, alloc) - ctx.tr.MarkReceived() - go ctx.tr.Run() - defer ctx.Cleanup() - - // Wait for the task to start - testWaitForTaskToStart(t, ctx) - - testutil.WaitForResult(func() (bool, error) { - services, _ := ctx.consul.Services() - if n := len(services); n != 1 { - return false, fmt.Errorf("expected 1 service found %d", n) - } - for _, s := range services { - if !reflect.DeepEqual(s.Tags, task.Services[0].Tags) { - return false, fmt.Errorf("expected tags=%q but found %q", - strings.Join(task.Services[0].Tags, ","), strings.Join(s.Tags, ",")) - } - } - return true, nil - }, func(err error) { - services, _ := ctx.consul.Services() - for _, s := range services { - t.Logf("Service: %#v", s) - } - t.Fatalf("err: %v", err) - }) - - // Begin the tear down - ctx.tr.Destroy(structs.NewTaskEvent(structs.TaskKilled)) - destroyed := time.Now() - - testutil.WaitForResult(func() (bool, error) { - services, _ := ctx.consul.Services() - if n := len(services); n == 1 { - return false, fmt.Errorf("expected 0 services found %d", n) - } - return true, nil - }, func(err error) { - t.Fatalf("err: %v", err) - }) - - // Wait for actual exit - select { - case <-ctx.tr.WaitCh(): - case <-time.After(time.Duration(testutil.TestMultiplier()*15) * time.Second): - t.Fatalf("timeout") - } - - // It should be impossible to reach here in less time than the shutdown delay - if time.Now().Before(destroyed.Add(task.ShutdownDelay)) { - t.Fatalf("task exited before shutdown delay") - } -} - -// TestTaskRunner_CheckWatcher_Restart asserts that when enabled an unhealthy -// Consul check will cause a task to restart following restart policy rules. -func TestTaskRunner_CheckWatcher_Restart(t *testing.T) { - t.Parallel() - - alloc := mock.Alloc() - - // Make the restart policy fail within this test - tg := alloc.Job.TaskGroups[0] - tg.RestartPolicy.Attempts = 2 - tg.RestartPolicy.Interval = 1 * time.Minute - tg.RestartPolicy.Delay = 10 * time.Millisecond - tg.RestartPolicy.Mode = structs.RestartPolicyModeFail - - task := tg.Tasks[0] - task.Driver = "mock_driver" - task.Config = map[string]interface{}{ - "exit_code": "0", - "run_for": "100s", - } - - // Make the task register a check that fails - task.Services[0].Checks[0] = &structs.ServiceCheck{ - Name: "test-restarts", - Type: structs.ServiceCheckTCP, - Interval: 50 * time.Millisecond, - CheckRestart: &structs.CheckRestart{ - Limit: 2, - Grace: 100 * time.Millisecond, - }, - } - - ctx := testTaskRunnerFromAlloc(t, true, alloc) - - // Replace mock Consul ServiceClient, with the real ServiceClient - // backed by a mock consul whose checks are always unhealthy. - consulAgent := consul.NewMockAgent() - consulAgent.SetStatus("critical") - consulClient := consul.NewServiceClient(consulAgent, testlog.HCLogger(t), true) - go consulClient.Run() - defer consulClient.Shutdown() - - ctx.tr.consul = consulClient - ctx.consul = nil // prevent accidental use of old mock - - ctx.tr.MarkReceived() - go ctx.tr.Run() - defer ctx.Cleanup() - - select { - case <-ctx.tr.WaitCh(): - case <-time.After(time.Duration(testutil.TestMultiplier()*15) * time.Second): - t.Fatalf("timeout") - } - - expected := []string{ - "Received", - "Task Setup", - "Started", - "Restart Signaled", - "Killing", - "Killed", - "Restarting", - "Started", - "Restart Signaled", - "Killing", - "Killed", - "Restarting", - "Started", - "Restart Signaled", - "Killing", - "Killed", - "Not Restarting", - } - - if n := len(ctx.upd.events); n != len(expected) { - t.Fatalf("should have %d ctx.updates found %d: %s", len(expected), n, ctx.upd) - } - - if ctx.upd.state != structs.TaskStateDead { - t.Fatalf("TaskState %v; want %v", ctx.upd.state, structs.TaskStateDead) - } - - if !ctx.upd.failed { - t.Fatalf("expected failed") - } - - for i, actual := range ctx.upd.events { - if actual.Type != expected[i] { - t.Errorf("%.2d - Expected %q but found %q", i, expected[i], actual.Type) - } - } -} - -// TestTaskRunner_DriverNetwork asserts that a driver's network is properly -// used in services and checks. -func TestTaskRunner_DriverNetwork(t *testing.T) { - t.Parallel() - - alloc := mock.Alloc() - task := alloc.Job.TaskGroups[0].Tasks[0] - task.Driver = "mock_driver" - task.Config = map[string]interface{}{ - "exit_code": 0, - "run_for": "100s", - "driver_ip": "10.1.2.3", - "driver_port_map": "http:80", - } - - // Create services and checks with custom address modes to exercise - // address detection logic - task.Services = []*structs.Service{ - { - Name: "host-service", - PortLabel: "http", - AddressMode: "host", - Checks: []*structs.ServiceCheck{ - { - Name: "driver-check", - Type: "tcp", - PortLabel: "1234", - AddressMode: "driver", - }, - }, - }, - { - Name: "driver-service", - PortLabel: "5678", - AddressMode: "driver", - Checks: []*structs.ServiceCheck{ - { - Name: "host-check", - Type: "tcp", - PortLabel: "http", - }, - { - Name: "driver-label-check", - Type: "tcp", - PortLabel: "http", - AddressMode: "driver", - }, - }, - }, - } - - ctx := testTaskRunnerFromAlloc(t, false, alloc) - ctx.tr.MarkReceived() - go ctx.tr.Run() - defer ctx.Cleanup() - - // Wait for the task to start - testWaitForTaskToStart(t, ctx) - - testutil.WaitForResult(func() (bool, error) { - services, _ := ctx.consul.Services() - if n := len(services); n != 2 { - return false, fmt.Errorf("expected 2 services, but found %d", n) - } - for _, s := range services { - switch s.Service { - case "host-service": - if expected := "192.168.0.100"; s.Address != expected { - return false, fmt.Errorf("expected host-service to have IP=%s but found %s", - expected, s.Address) - } - case "driver-service": - if expected := "10.1.2.3"; s.Address != expected { - return false, fmt.Errorf("expected driver-service to have IP=%s but found %s", - expected, s.Address) - } - if expected := 5678; s.Port != expected { - return false, fmt.Errorf("expected driver-service to have port=%d but found %d", - expected, s.Port) - } - default: - return false, fmt.Errorf("unexpected service: %q", s.Service) - } - - } - - checks := ctx.consul.CheckRegs() - if n := len(checks); n != 3 { - return false, fmt.Errorf("expected 3 checks, but found %d", n) - } - for _, check := range checks { - switch check.Name { - case "driver-check": - if expected := "10.1.2.3:1234"; check.TCP != expected { - return false, fmt.Errorf("expected driver-check to have address %q but found %q", expected, check.TCP) - } - case "driver-label-check": - if expected := "10.1.2.3:80"; check.TCP != expected { - return false, fmt.Errorf("expected driver-label-check to have address %q but found %q", expected, check.TCP) - } - case "host-check": - if expected := "192.168.0.100:"; !strings.HasPrefix(check.TCP, expected) { - return false, fmt.Errorf("expected host-check to have address start with %q but found %q", expected, check.TCP) - } - default: - return false, fmt.Errorf("unexpected check: %q", check.Name) - } - } - - return true, nil - }, func(err error) { - services, _ := ctx.consul.Services() - for _, s := range services { - t.Logf(pretty.Sprint("Service: ", s)) - } - for _, c := range ctx.consul.CheckRegs() { - t.Logf(pretty.Sprint("Check: ", c)) - } - t.Fatalf("error: %v", err) - }) -} diff --git a/client/allocrunnerdeprecated/taskrunner/task_runner_unix_test.go b/client/allocrunnerdeprecated/taskrunner/task_runner_unix_test.go deleted file mode 100644 index f911767f6256..000000000000 --- a/client/allocrunnerdeprecated/taskrunner/task_runner_unix_test.go +++ /dev/null @@ -1,73 +0,0 @@ -// +build deprecated,!windows - -package taskrunner - -import ( - "syscall" - "testing" - "time" - - "github.com/hashicorp/nomad/client/vaultclient" - "github.com/hashicorp/nomad/nomad/mock" - "github.com/hashicorp/nomad/nomad/structs" -) - -// This test is just to make sure we are resilient to failures when a restart or -// signal is triggered and the task is not running. -func TestTaskRunner_RestartSignalTask_NotRunning(t *testing.T) { - alloc := mock.Alloc() - task := alloc.Job.TaskGroups[0].Tasks[0] - task.Driver = "mock_driver" - task.Config = map[string]interface{}{ - "exit_code": "0", - "run_for": "100s", - } - - // Use vault to block the start - task.Vault = &structs.Vault{Policies: []string{"default"}} - - ctx := testTaskRunnerFromAlloc(t, true, alloc) - ctx.tr.MarkReceived() - defer ctx.Cleanup() - - // Control when we get a Vault token - token := "1234" - waitCh := make(chan struct{}) - defer close(waitCh) - handler := func(*structs.Allocation, []string) (map[string]string, error) { - <-waitCh - return map[string]string{task.Name: token}, nil - } - ctx.tr.vaultClient.(*vaultclient.MockVaultClient).DeriveTokenFn = handler - go ctx.tr.Run() - - select { - case <-ctx.tr.WaitCh(): - t.Fatalf("premature exit") - case <-time.After(1 * time.Second): - } - - // Send a signal and restart - if err := ctx.tr.Signal("test", "don't panic", syscall.SIGCHLD); err != nil { - t.Fatalf("Signalling errored: %v", err) - } - - // Send a restart - ctx.tr.Restart("test", "don't panic", false) - - if len(ctx.upd.events) != 2 { - t.Fatalf("should have 2 ctx.updates: %#v", ctx.upd.events) - } - - if ctx.upd.state != structs.TaskStatePending { - t.Fatalf("TaskState %v; want %v", ctx.upd.state, structs.TaskStatePending) - } - - if ctx.upd.events[0].Type != structs.TaskReceived { - t.Fatalf("First Event was %v; want %v", ctx.upd.events[0].Type, structs.TaskReceived) - } - - if ctx.upd.events[1].Type != structs.TaskSetup { - t.Fatalf("Second Event was %v; want %v", ctx.upd.events[1].Type, structs.TaskSetup) - } -} diff --git a/client/allocrunnerdeprecated/testing.go b/client/allocrunnerdeprecated/testing.go deleted file mode 100644 index 92a3b56d3bcc..000000000000 --- a/client/allocrunnerdeprecated/testing.go +++ /dev/null @@ -1,70 +0,0 @@ -// +build deprecated - -package allocrunner - -import ( - "io/ioutil" - "os" - "sync" - "testing" - - "github.com/boltdb/bolt" - "github.com/hashicorp/nomad/client/allocwatcher" - "github.com/hashicorp/nomad/client/config" - consulApi "github.com/hashicorp/nomad/client/consul" - "github.com/hashicorp/nomad/client/vaultclient" - "github.com/hashicorp/nomad/helper/testlog" - "github.com/hashicorp/nomad/nomad/mock" - "github.com/hashicorp/nomad/nomad/structs" -) - -type MockAllocStateUpdater struct { - Allocs []*structs.Allocation - mu sync.Mutex -} - -// Update fulfills the TaskStateUpdater interface -func (m *MockAllocStateUpdater) Update(alloc *structs.Allocation) { - m.mu.Lock() - m.Allocs = append(m.Allocs, alloc) - m.mu.Unlock() -} - -// Last returns a copy of the last alloc (or nil) sync'd -func (m *MockAllocStateUpdater) Last() *structs.Allocation { - m.mu.Lock() - defer m.mu.Unlock() - n := len(m.Allocs) - if n == 0 { - return nil - } - return m.Allocs[n-1].Copy() -} - -func TestAllocRunnerFromAlloc(t *testing.T, alloc *structs.Allocation, restarts bool) (*MockAllocStateUpdater, *AllocRunner) { - conf := config.DefaultConfig() - conf.Node = mock.Node() - conf.StateDir = os.TempDir() - conf.AllocDir = os.TempDir() - tmp, _ := ioutil.TempFile("", "state-db") - db, _ := bolt.Open(tmp.Name(), 0600, nil) - upd := &MockAllocStateUpdater{} - if !restarts { - *alloc.Job.LookupTaskGroup(alloc.TaskGroup).RestartPolicy = structs.RestartPolicy{Attempts: 0} - alloc.Job.Type = structs.JobTypeBatch - } - vclient := vaultclient.NewMockVaultClient() - ar := NewAllocRunner(testlog.Logger(t), conf, db, upd.Update, alloc, vclient, consulApi.NewMockConsulServiceClient(t, testlog.HCLogger(t)), allocwatcher.NoopPrevAlloc{}) - return upd, ar -} - -func TestAllocRunner(t *testing.T, restarts bool) (*MockAllocStateUpdater, *AllocRunner) { - // Use mock driver - alloc := mock.Alloc() - task := alloc.Job.TaskGroups[0].Tasks[0] - task.Driver = "mock_driver" - task.Config = map[string]interface{}{ - "run_for": "500ms", - } - return TestAllocRunnerFromAlloc(t, alloc, restarts) -} From 694e3010c22f10240ff3ae713de7b7fe3edf530b Mon Sep 17 00:00:00 2001 From: Mahmood Ali Date: Fri, 4 Jan 2019 16:11:25 -0500 Subject: [PATCH 3/6] use drivers.FSIsolation --- .../taskrunner/dispatch_hook_test.go | 7 ++--- .../allocrunner/taskrunner/task_dir_hook.go | 10 +++---- client/structs/structs.go | 29 ------------------- drivers/docker/config.go | 3 +- drivers/exec/driver.go | 2 +- drivers/java/driver.go | 4 +-- drivers/lxc/driver.go | 2 +- drivers/mock/driver.go | 2 +- drivers/qemu/driver.go | 2 +- drivers/rawexec/driver.go | 2 +- drivers/rkt/driver.go | 2 +- .../shared/executor/executor_linux_test.go | 3 +- drivers/shared/executor/executor_test.go | 3 +- plugins/drivers/client.go | 8 ++--- plugins/drivers/driver.go | 14 +++++++-- plugins/drivers/server.go | 7 ++--- plugins/drivers/testutils/testing.go | 6 +--- plugins/drivers/utils/utils.go | 8 ++--- 18 files changed, 42 insertions(+), 72 deletions(-) diff --git a/client/allocrunner/taskrunner/dispatch_hook_test.go b/client/allocrunner/taskrunner/dispatch_hook_test.go index ee7eb7d0f660..9ac683a146e2 100644 --- a/client/allocrunner/taskrunner/dispatch_hook_test.go +++ b/client/allocrunner/taskrunner/dispatch_hook_test.go @@ -9,7 +9,6 @@ import ( "github.com/golang/snappy" "github.com/hashicorp/nomad/client/allocdir" "github.com/hashicorp/nomad/client/allocrunner/interfaces" - cstructs "github.com/hashicorp/nomad/client/structs" "github.com/hashicorp/nomad/helper/testlog" "github.com/hashicorp/nomad/nomad/mock" "github.com/hashicorp/nomad/nomad/structs" @@ -34,7 +33,7 @@ func TestTaskRunner_DispatchHook_NoPayload(t *testing.T) { alloc := mock.BatchAlloc() task := alloc.Job.TaskGroups[0].Tasks[0] taskDir := allocDir.NewTaskDir(task.Name) - require.NoError(taskDir.Build(false, nil, cstructs.FSIsolationNone)) + require.NoError(taskDir.Build(false, nil)) h := newDispatchHook(alloc, logger) @@ -79,7 +78,7 @@ func TestTaskRunner_DispatchHook_Ok(t *testing.T) { File: "out", } taskDir := allocDir.NewTaskDir(task.Name) - require.NoError(taskDir.Build(false, nil, cstructs.FSIsolationNone)) + require.NoError(taskDir.Build(false, nil)) h := newDispatchHook(alloc, logger) @@ -123,7 +122,7 @@ func TestTaskRunner_DispatchHook_Error(t *testing.T) { File: "out", } taskDir := allocDir.NewTaskDir(task.Name) - require.NoError(taskDir.Build(false, nil, cstructs.FSIsolationNone)) + require.NoError(taskDir.Build(false, nil)) h := newDispatchHook(alloc, logger) diff --git a/client/allocrunner/taskrunner/task_dir_hook.go b/client/allocrunner/taskrunner/task_dir_hook.go index 9b18c8628758..9e54b40b6589 100644 --- a/client/allocrunner/taskrunner/task_dir_hook.go +++ b/client/allocrunner/taskrunner/task_dir_hook.go @@ -8,9 +8,9 @@ import ( "github.com/hashicorp/nomad/client/allocdir" "github.com/hashicorp/nomad/client/allocrunner/interfaces" cconfig "github.com/hashicorp/nomad/client/config" - cstructs "github.com/hashicorp/nomad/client/structs" "github.com/hashicorp/nomad/client/taskenv" "github.com/hashicorp/nomad/nomad/structs" + "github.com/hashicorp/nomad/plugins/drivers" ) type taskDirHook struct { @@ -44,7 +44,7 @@ func (h *taskDirHook) Prestart(ctx context.Context, req *interfaces.TaskPrestart // Build the task directory structure fsi := h.runner.driverCapabilities.FSIsolation - err := h.runner.taskDir.Build(fsi == cstructs.FSIsolationChroot, chroot) + err := h.runner.taskDir.Build(fsi == drivers.FSIsolationChroot, chroot) if err != nil { return err } @@ -56,10 +56,10 @@ func (h *taskDirHook) Prestart(ctx context.Context, req *interfaces.TaskPrestart } // setEnvvars sets path and host env vars depending on the FS isolation used. -func setEnvvars(envBuilder *taskenv.Builder, fsi cstructs.FSIsolation, taskDir *allocdir.TaskDir, conf *cconfig.Config) { +func setEnvvars(envBuilder *taskenv.Builder, fsi drivers.FSIsolation, taskDir *allocdir.TaskDir, conf *cconfig.Config) { // Set driver-specific environment variables switch fsi { - case cstructs.FSIsolationNone: + case drivers.FSIsolationNone: // Use host paths envBuilder.SetAllocDir(taskDir.SharedAllocDir) envBuilder.SetTaskLocalDir(taskDir.LocalDir) @@ -72,7 +72,7 @@ func setEnvvars(envBuilder *taskenv.Builder, fsi cstructs.FSIsolation, taskDir * } // Set the host environment variables for non-image based drivers - if fsi != cstructs.FSIsolationImage { + if fsi != drivers.FSIsolationImage { filter := strings.Split(conf.ReadDefault("env.blacklist", cconfig.DefaultEnvBlacklist), ",") envBuilder.SetHostEnvvars(filter) } diff --git a/client/structs/structs.go b/client/structs/structs.go index 6edcbf8d01f0..160c3f7272f0 100644 --- a/client/structs/structs.go +++ b/client/structs/structs.go @@ -272,35 +272,6 @@ func joinStringSet(s1, s2 []string) []string { return j } -// FSIsolation is an enumeration to describe what kind of filesystem isolation -// a driver supports. -type FSIsolation int - -const ( - // FSIsolationNone means no isolation. The host filesystem is used. - FSIsolationNone FSIsolation = 0 - - // FSIsolationChroot means the driver will use a chroot on the host - // filesystem. - FSIsolationChroot FSIsolation = 1 - - // FSIsolationImage means the driver uses an image. - FSIsolationImage FSIsolation = 2 -) - -func (f FSIsolation) String() string { - switch f { - case 0: - return "none" - case 1: - return "chroot" - case 2: - return "image" - default: - return "INVALID" - } -} - // DriverNetwork is the network created by driver's (eg Docker's bridge // network) during Prestart. type DriverNetwork struct { diff --git a/drivers/docker/config.go b/drivers/docker/config.go index fa3edb3ce8a0..52931b42dcfb 100644 --- a/drivers/docker/config.go +++ b/drivers/docker/config.go @@ -8,7 +8,6 @@ import ( docker "github.com/fsouza/go-dockerclient" hclog "github.com/hashicorp/go-hclog" - "github.com/hashicorp/nomad/client/structs" "github.com/hashicorp/nomad/plugins/base" "github.com/hashicorp/nomad/plugins/drivers" "github.com/hashicorp/nomad/plugins/shared/hclspec" @@ -295,7 +294,7 @@ var ( capabilities = &drivers.Capabilities{ SendSignals: true, Exec: true, - FSIsolation: structs.FSIsolationImage, + FSIsolation: drivers.FSIsolationImage, } ) diff --git a/drivers/exec/driver.go b/drivers/exec/driver.go index c9f4a404c105..8f12999e59cd 100644 --- a/drivers/exec/driver.go +++ b/drivers/exec/driver.go @@ -70,7 +70,7 @@ var ( capabilities = &drivers.Capabilities{ SendSignals: true, Exec: true, - FSIsolation: cstructs.FSIsolationChroot, + FSIsolation: drivers.FSIsolationChroot, } ) diff --git a/drivers/java/driver.go b/drivers/java/driver.go index fb712600a85c..9e063799cca0 100644 --- a/drivers/java/driver.go +++ b/drivers/java/driver.go @@ -81,7 +81,7 @@ var ( capabilities = &drivers.Capabilities{ SendSignals: false, Exec: false, - FSIsolation: cstructs.FSIsolationNone, + FSIsolation: drivers.FSIsolationNone, } _ drivers.DriverPlugin = (*Driver)(nil) @@ -89,7 +89,7 @@ var ( func init() { if runtime.GOOS == "linux" { - capabilities.FSIsolation = cstructs.FSIsolationChroot + capabilities.FSIsolation = drivers.FSIsolationChroot } } diff --git a/drivers/lxc/driver.go b/drivers/lxc/driver.go index 8226cfbe0833..f04601196ff8 100644 --- a/drivers/lxc/driver.go +++ b/drivers/lxc/driver.go @@ -109,7 +109,7 @@ var ( capabilities = &drivers.Capabilities{ SendSignals: false, Exec: false, - FSIsolation: cstructs.FSIsolationImage, + FSIsolation: drivers.FSIsolationImage, } ) diff --git a/drivers/mock/driver.go b/drivers/mock/driver.go index 89af73c85793..da4f10ff9555 100644 --- a/drivers/mock/driver.go +++ b/drivers/mock/driver.go @@ -87,7 +87,7 @@ var ( capabilities = &drivers.Capabilities{ SendSignals: false, Exec: true, - FSIsolation: cstructs.FSIsolationNone, + FSIsolation: drivers.FSIsolationNone, } ) diff --git a/drivers/qemu/driver.go b/drivers/qemu/driver.go index 275b26dd9ea4..f0f1dec58404 100644 --- a/drivers/qemu/driver.go +++ b/drivers/qemu/driver.go @@ -98,7 +98,7 @@ var ( capabilities = &drivers.Capabilities{ SendSignals: false, Exec: false, - FSIsolation: cstructs.FSIsolationImage, + FSIsolation: drivers.FSIsolationImage, } _ drivers.DriverPlugin = (*Driver)(nil) diff --git a/drivers/rawexec/driver.go b/drivers/rawexec/driver.go index fba3b39b74cb..9e0799111c1d 100644 --- a/drivers/rawexec/driver.go +++ b/drivers/rawexec/driver.go @@ -93,7 +93,7 @@ var ( capabilities = &drivers.Capabilities{ SendSignals: true, Exec: true, - FSIsolation: cstructs.FSIsolationNone, + FSIsolation: drivers.FSIsolationNone, } ) diff --git a/drivers/rkt/driver.go b/drivers/rkt/driver.go index fe4fbcccb575..4d3fdc3d74a2 100644 --- a/drivers/rkt/driver.go +++ b/drivers/rkt/driver.go @@ -124,7 +124,7 @@ var ( capabilities = &drivers.Capabilities{ SendSignals: true, Exec: true, - FSIsolation: cstructs.FSIsolationImage, + FSIsolation: drivers.FSIsolationImage, } reRktVersion = regexp.MustCompile(`rkt [vV]ersion[:]? (\d[.\d]+)`) diff --git a/drivers/shared/executor/executor_linux_test.go b/drivers/shared/executor/executor_linux_test.go index 7da6d6838a6f..94cc94c7a90f 100644 --- a/drivers/shared/executor/executor_linux_test.go +++ b/drivers/shared/executor/executor_linux_test.go @@ -13,7 +13,6 @@ import ( hclog "github.com/hashicorp/go-hclog" "github.com/hashicorp/nomad/client/allocdir" - cstructs "github.com/hashicorp/nomad/client/structs" "github.com/hashicorp/nomad/client/taskenv" "github.com/hashicorp/nomad/client/testutil" "github.com/hashicorp/nomad/helper/testlog" @@ -60,7 +59,7 @@ func testExecutorCommandWithChroot(t *testing.T) (*ExecCommand, *allocdir.AllocD if err := allocDir.Build(); err != nil { t.Fatalf("AllocDir.Build() failed: %v", err) } - if err := allocDir.NewTaskDir(task.Name).Build(false, chrootEnv, cstructs.FSIsolationChroot); err != nil { + if err := allocDir.NewTaskDir(task.Name).Build(true, chrootEnv); err != nil { allocDir.Destroy() t.Fatalf("allocDir.NewTaskDir(%q) failed: %v", task.Name, err) } diff --git a/drivers/shared/executor/executor_test.go b/drivers/shared/executor/executor_test.go index f5794b75c2b4..ca9ae131da3e 100644 --- a/drivers/shared/executor/executor_test.go +++ b/drivers/shared/executor/executor_test.go @@ -15,7 +15,6 @@ import ( hclog "github.com/hashicorp/go-hclog" "github.com/hashicorp/nomad/client/allocdir" - cstructs "github.com/hashicorp/nomad/client/structs" "github.com/hashicorp/nomad/client/taskenv" "github.com/hashicorp/nomad/helper/testlog" "github.com/hashicorp/nomad/nomad/mock" @@ -45,7 +44,7 @@ func testExecutorCommand(t *testing.T) (*ExecCommand, *allocdir.AllocDir) { if err := allocDir.Build(); err != nil { t.Fatalf("AllocDir.Build() failed: %v", err) } - if err := allocDir.NewTaskDir(task.Name).Build(false, nil, cstructs.FSIsolationNone); err != nil { + if err := allocDir.NewTaskDir(task.Name).Build(false, nil); err != nil { allocDir.Destroy() t.Fatalf("allocDir.NewTaskDir(%q) failed: %v", task.Name, err) } diff --git a/plugins/drivers/client.go b/plugins/drivers/client.go index 98adc86328f8..3ba9b5600165 100644 --- a/plugins/drivers/client.go +++ b/plugins/drivers/client.go @@ -56,13 +56,13 @@ func (d *driverPluginClient) Capabilities() (*Capabilities, error) { switch resp.Capabilities.FsIsolation { case proto.DriverCapabilities_NONE: - caps.FSIsolation = cstructs.FSIsolationNone + caps.FSIsolation = FSIsolationNone case proto.DriverCapabilities_CHROOT: - caps.FSIsolation = cstructs.FSIsolationChroot + caps.FSIsolation = FSIsolationChroot case proto.DriverCapabilities_IMAGE: - caps.FSIsolation = cstructs.FSIsolationImage + caps.FSIsolation = FSIsolationImage default: - caps.FSIsolation = cstructs.FSIsolationNone + caps.FSIsolation = FSIsolationNone } } diff --git a/plugins/drivers/driver.go b/plugins/drivers/driver.go index fb2802e53518..d0d8afa9e4f6 100644 --- a/plugins/drivers/driver.go +++ b/plugins/drivers/driver.go @@ -87,12 +87,20 @@ type Fingerprint struct { Err error } +// FSIsolation is an enumeration to describe what kind of filesystem isolation +// a driver supports. type FSIsolation string var ( - FSIsolationNone = FSIsolation("none") + // FSIsolationNone means no isolation. The host filesystem is used. + FSIsolationNone = FSIsolation("none") + + // FSIsolationChroot means the driver will use a chroot on the host + // filesystem. FSIsolationChroot = FSIsolation("chroot") - FSIsolationImage = FSIsolation("image") + + // FSIsolationImage means the driver uses an image. + FSIsolationImage = FSIsolation("image") ) type Capabilities struct { @@ -104,7 +112,7 @@ type Capabilities struct { Exec bool //FSIsolation indicates what kind of filesystem isolation the driver supports. - FSIsolation cstructs.FSIsolation + FSIsolation FSIsolation } type TaskConfig struct { diff --git a/plugins/drivers/server.go b/plugins/drivers/server.go index 337d78fcc663..0d4e184d5254 100644 --- a/plugins/drivers/server.go +++ b/plugins/drivers/server.go @@ -7,7 +7,6 @@ import ( "github.com/golang/protobuf/ptypes" hclog "github.com/hashicorp/go-hclog" plugin "github.com/hashicorp/go-plugin" - cstructs "github.com/hashicorp/nomad/client/structs" "github.com/hashicorp/nomad/nomad/structs" "github.com/hashicorp/nomad/plugins/drivers/proto" dstructs "github.com/hashicorp/nomad/plugins/shared/structs" @@ -48,11 +47,11 @@ func (b *driverPluginServer) Capabilities(ctx context.Context, req *proto.Capabi } switch caps.FSIsolation { - case cstructs.FSIsolationNone: + case FSIsolationNone: resp.Capabilities.FsIsolation = proto.DriverCapabilities_NONE - case cstructs.FSIsolationChroot: + case FSIsolationChroot: resp.Capabilities.FsIsolation = proto.DriverCapabilities_CHROOT - case cstructs.FSIsolationImage: + case FSIsolationImage: resp.Capabilities.FsIsolation = proto.DriverCapabilities_IMAGE default: resp.Capabilities.FsIsolation = proto.DriverCapabilities_NONE diff --git a/plugins/drivers/testutils/testing.go b/plugins/drivers/testutils/testing.go index f4c54702ff24..243dff769892 100644 --- a/plugins/drivers/testutils/testing.go +++ b/plugins/drivers/testutils/testing.go @@ -99,12 +99,8 @@ func (h *DriverHarness) MkAllocDir(t *drivers.TaskConfig, enableLogs bool) func( caps, err := h.Capabilities() require.NoError(h.t, err) - var entries map[string]string fsi := caps.FSIsolation - if fsi == cstructs.FSIsolationChroot { - entries = config.DefaultChrootEnv - } - require.NoError(h.t, taskDir.Build(false, entries, fsi)) + require.NoError(h.t, taskDir.Build(fsi == drivers.FSIsolationChroot, config.DefaultChrootEnv)) task := &structs.Task{ Name: t.Name, diff --git a/plugins/drivers/utils/utils.go b/plugins/drivers/utils/utils.go index 1af2c7e789f4..90ec0cada5fb 100644 --- a/plugins/drivers/utils/utils.go +++ b/plugins/drivers/utils/utils.go @@ -5,16 +5,16 @@ import ( "github.com/hashicorp/nomad/client/allocdir" "github.com/hashicorp/nomad/client/config" - cstructs "github.com/hashicorp/nomad/client/structs" "github.com/hashicorp/nomad/client/taskenv" "github.com/hashicorp/nomad/nomad/structs" + "github.com/hashicorp/nomad/plugins/drivers" ) // SetEnvvars sets path and host env vars depending on the FS isolation used. -func SetEnvvars(envBuilder *taskenv.Builder, fsi cstructs.FSIsolation, taskDir *allocdir.TaskDir, conf *config.Config) { +func SetEnvvars(envBuilder *taskenv.Builder, fsi drivers.FSIsolation, taskDir *allocdir.TaskDir, conf *config.Config) { // Set driver-specific environment variables switch fsi { - case cstructs.FSIsolationNone: + case drivers.FSIsolationNone: // Use host paths envBuilder.SetAllocDir(taskDir.SharedAllocDir) envBuilder.SetTaskLocalDir(taskDir.LocalDir) @@ -27,7 +27,7 @@ func SetEnvvars(envBuilder *taskenv.Builder, fsi cstructs.FSIsolation, taskDir * } // Set the host environment variables for non-image based drivers - if fsi != cstructs.FSIsolationImage { + if fsi != drivers.FSIsolationImage { filter := strings.Split(conf.ReadDefault("env.blacklist", config.DefaultEnvBlacklist), ",") envBuilder.SetHostEnvvars(filter) } From c0162fab35d1f5307e8f46a08f02b983a0af1c16 Mon Sep 17 00:00:00 2001 From: Mahmood Ali Date: Fri, 4 Jan 2019 18:01:35 -0500 Subject: [PATCH 4/6] move cstructs.DeviceNetwork to drivers pkg --- .../allocrunner/interfaces/task_lifecycle.go | 2 +- .../allocrunner/taskrunner/driver_handle.go | 6 +- client/allocrunner/taskrunner/service_hook.go | 4 +- client/allocrunner/taskrunner/state/state.go | 3 +- client/allocrunner/taskrunner/task_runner.go | 2 +- client/state/08types.go | 3 +- client/structs/structs.go | 57 ----------------- client/taskenv/env.go | 10 +-- client/taskenv/env_test.go | 10 +-- command/agent/consul/client.go | 4 +- command/agent/consul/structs.go | 6 +- command/agent/consul/unit_test.go | 27 ++++---- drivers/docker/driver.go | 4 +- drivers/docker/handle.go | 4 +- drivers/exec/driver.go | 2 +- drivers/java/driver.go | 2 +- drivers/lxc/driver.go | 2 +- drivers/mock/driver.go | 4 +- drivers/qemu/driver.go | 6 +- drivers/rawexec/driver.go | 2 +- drivers/rkt/driver.go | 8 +-- plugins/drivers/client.go | 8 +-- plugins/drivers/driver.go | 61 ++++++++++++++++++- plugins/drivers/testutils/testing.go | 4 +- plugins/drivers/testutils/testing_test.go | 5 +- 25 files changed, 121 insertions(+), 125 deletions(-) diff --git a/client/allocrunner/interfaces/task_lifecycle.go b/client/allocrunner/interfaces/task_lifecycle.go index 61454f7c87fc..f8d50f7a54d0 100644 --- a/client/allocrunner/interfaces/task_lifecycle.go +++ b/client/allocrunner/interfaces/task_lifecycle.go @@ -96,7 +96,7 @@ type TaskPoststartRequest struct { DriverExec interfaces.ScriptExecutor // Network info (may be nil) - DriverNetwork *cstructs.DriverNetwork + DriverNetwork *drivers.DriverNetwork // TaskEnv is the task's environment TaskEnv *taskenv.TaskEnv diff --git a/client/allocrunner/taskrunner/driver_handle.go b/client/allocrunner/taskrunner/driver_handle.go index 0d3c076582c1..ed4762f77301 100644 --- a/client/allocrunner/taskrunner/driver_handle.go +++ b/client/allocrunner/taskrunner/driver_handle.go @@ -10,7 +10,7 @@ import ( ) // NewDriverHandle returns a handle for task operations on a specific task -func NewDriverHandle(driver drivers.DriverPlugin, taskID string, task *structs.Task, net *cstructs.DriverNetwork) *DriverHandle { +func NewDriverHandle(driver drivers.DriverPlugin, taskID string, task *structs.Task, net *drivers.DriverNetwork) *DriverHandle { return &DriverHandle{ driver: driver, net: net, @@ -23,7 +23,7 @@ func NewDriverHandle(driver drivers.DriverPlugin, taskID string, task *structs.T // an api to perform driver operations on the task type DriverHandle struct { driver drivers.DriverPlugin - net *cstructs.DriverNetwork + net *drivers.DriverNetwork task *structs.Task taskID string } @@ -61,6 +61,6 @@ func (h *DriverHandle) Exec(timeout time.Duration, cmd string, args []string) ([ return res.Stdout, res.ExitResult.ExitCode, res.ExitResult.Err } -func (h *DriverHandle) Network() *cstructs.DriverNetwork { +func (h *DriverHandle) Network() *drivers.DriverNetwork { return h.net } diff --git a/client/allocrunner/taskrunner/service_hook.go b/client/allocrunner/taskrunner/service_hook.go index 0c1d7368a3a4..3c749f1e3b4c 100644 --- a/client/allocrunner/taskrunner/service_hook.go +++ b/client/allocrunner/taskrunner/service_hook.go @@ -10,10 +10,10 @@ import ( "github.com/hashicorp/nomad/client/allocrunner/interfaces" tinterfaces "github.com/hashicorp/nomad/client/allocrunner/taskrunner/interfaces" "github.com/hashicorp/nomad/client/consul" - cstructs "github.com/hashicorp/nomad/client/structs" "github.com/hashicorp/nomad/client/taskenv" agentconsul "github.com/hashicorp/nomad/command/agent/consul" "github.com/hashicorp/nomad/nomad/structs" + "github.com/hashicorp/nomad/plugins/drivers" ) type serviceHookConfig struct { @@ -37,7 +37,7 @@ type serviceHook struct { // The following fields may be updated delay time.Duration driverExec tinterfaces.ScriptExecutor - driverNet *cstructs.DriverNetwork + driverNet *drivers.DriverNetwork canary bool services []*structs.Service networks structs.Networks diff --git a/client/allocrunner/taskrunner/state/state.go b/client/allocrunner/taskrunner/state/state.go index ba4879eb341d..f5e6dfba3e98 100644 --- a/client/allocrunner/taskrunner/state/state.go +++ b/client/allocrunner/taskrunner/state/state.go @@ -1,7 +1,6 @@ package state import ( - "github.com/hashicorp/nomad/client/structs" "github.com/hashicorp/nomad/helper" "github.com/hashicorp/nomad/plugins/drivers" ) @@ -13,7 +12,7 @@ type LocalState struct { // DriverNetwork is the network information returned by the task // driver's Start method - DriverNetwork *structs.DriverNetwork + DriverNetwork *drivers.DriverNetwork // TaskHandle is the handle used to reattach to the task during recovery TaskHandle *drivers.TaskHandle diff --git a/client/allocrunner/taskrunner/task_runner.go b/client/allocrunner/taskrunner/task_runner.go index e120f0ca5b0a..2f9cc54afa32 100644 --- a/client/allocrunner/taskrunner/task_runner.go +++ b/client/allocrunner/taskrunner/task_runner.go @@ -781,7 +781,7 @@ func (tr *TaskRunner) Restore() error { // restoreHandle ensures a TaskHandle is valid by calling Driver.RecoverTask // and sets the driver handle. If the TaskHandle is not valid, DestroyTask is // called. -func (tr *TaskRunner) restoreHandle(taskHandle *drivers.TaskHandle, net *cstructs.DriverNetwork) (success bool) { +func (tr *TaskRunner) restoreHandle(taskHandle *drivers.TaskHandle, net *drivers.DriverNetwork) (success bool) { // Ensure handle is well-formed if taskHandle.Config == nil { return true diff --git a/client/state/08types.go b/client/state/08types.go index 758e7ef6e4c0..ce047821357f 100644 --- a/client/state/08types.go +++ b/client/state/08types.go @@ -2,7 +2,6 @@ package state import ( "github.com/hashicorp/nomad/client/allocrunner/taskrunner/state" - cstructs "github.com/hashicorp/nomad/client/structs" "github.com/hashicorp/nomad/nomad/structs" "github.com/hashicorp/nomad/plugins/drivers" ) @@ -36,7 +35,7 @@ type taskRunnerState08 struct { ArtifactDownloaded bool TaskDirBuilt bool PayloadRendered bool - DriverNetwork *cstructs.DriverNetwork + DriverNetwork *drivers.DriverNetwork // Created Resources are no longer used. //CreatedResources *driver.CreatedResources } diff --git a/client/structs/structs.go b/client/structs/structs.go index 160c3f7272f0..f57c1d67c2f4 100644 --- a/client/structs/structs.go +++ b/client/structs/structs.go @@ -3,10 +3,7 @@ package structs //go:generate codecgen -d 102 -o structs.generated.go structs.go import ( - "crypto/md5" "errors" - "io" - "strconv" "time" "github.com/hashicorp/nomad/client/stats" @@ -272,60 +269,6 @@ func joinStringSet(s1, s2 []string) []string { return j } -// DriverNetwork is the network created by driver's (eg Docker's bridge -// network) during Prestart. -type DriverNetwork struct { - // PortMap can be set by drivers to replace ports in environment - // variables with driver-specific mappings. - PortMap map[string]int - - // IP is the IP address for the task created by the driver. - IP string - - // AutoAdvertise indicates whether the driver thinks services that - // choose to auto-advertise-addresses should use this IP instead of the - // host's. eg If a Docker network plugin is used - AutoAdvertise bool -} - -// Advertise returns true if the driver suggests using the IP set. May be -// called on a nil Network in which case it returns false. -func (d *DriverNetwork) Advertise() bool { - return d != nil && d.AutoAdvertise -} - -// Copy a DriverNetwork struct. If it is nil, nil is returned. -func (d *DriverNetwork) Copy() *DriverNetwork { - if d == nil { - return nil - } - pm := make(map[string]int, len(d.PortMap)) - for k, v := range d.PortMap { - pm[k] = v - } - return &DriverNetwork{ - PortMap: pm, - IP: d.IP, - AutoAdvertise: d.AutoAdvertise, - } -} - -// Hash the contents of a DriverNetwork struct to detect changes. If it is nil, -// an empty slice is returned. -func (d *DriverNetwork) Hash() []byte { - if d == nil { - return []byte{} - } - h := md5.New() - io.WriteString(h, d.IP) - io.WriteString(h, strconv.FormatBool(d.AutoAdvertise)) - for k, v := range d.PortMap { - io.WriteString(h, k) - io.WriteString(h, strconv.Itoa(v)) - } - return h.Sum(nil) -} - // HealthCheckRequest is the request type for a type that fulfils the Health // Check interface type HealthCheckRequest struct{} diff --git a/client/taskenv/env.go b/client/taskenv/env.go index 33dac7855d23..411cef3dc096 100644 --- a/client/taskenv/env.go +++ b/client/taskenv/env.go @@ -8,10 +8,10 @@ import ( "strings" "sync" - cstructs "github.com/hashicorp/nomad/client/structs" "github.com/hashicorp/nomad/helper" hargs "github.com/hashicorp/nomad/helper/args" "github.com/hashicorp/nomad/nomad/structs" + "github.com/hashicorp/nomad/plugins/drivers" "github.com/zclconf/go-cty/cty" ) @@ -304,7 +304,7 @@ type Builder struct { // driverNetwork is the network defined by the driver (or nil if none // was defined). - driverNetwork *cstructs.DriverNetwork + driverNetwork *drivers.DriverNetwork // network resources from the task; must be lazily turned into env vars // because portMaps and advertiseIP can change after builder creation @@ -665,7 +665,7 @@ func (b *Builder) SetSecretsDir(dir string) *Builder { } // SetDriverNetwork defined by the driver. -func (b *Builder) SetDriverNetwork(n *cstructs.DriverNetwork) *Builder { +func (b *Builder) SetDriverNetwork(n *drivers.DriverNetwork) *Builder { ncopy := n.Copy() b.mu.Lock() b.driverNetwork = ncopy @@ -682,7 +682,7 @@ func (b *Builder) SetDriverNetwork(n *cstructs.DriverNetwork) *Builder { // // Task: NOMAD_TASK_{IP,PORT,ADDR}__