Skip to content

Commit

Permalink
Merge pull request #1653 from hashicorp/b-fix-artifact-retry
Browse files Browse the repository at this point in the history
Don't fail other tasks when retrying artifact get
  • Loading branch information
schmichael committed Aug 26, 2016
2 parents 5b9c2ea + 5b0dd7f commit 40e1ffd
Show file tree
Hide file tree
Showing 2 changed files with 59 additions and 4 deletions.
61 changes: 58 additions & 3 deletions client/alloc_runner_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,22 +25,24 @@ func (m *MockAllocStateUpdater) Update(alloc *structs.Allocation) {
m.Allocs = append(m.Allocs, alloc)
}

func testAllocRunner(restarts bool) (*MockAllocStateUpdater, *AllocRunner) {
func testAllocRunnerFromAlloc(alloc *structs.Allocation, restarts bool) (*MockAllocStateUpdater, *AllocRunner) {
logger := testLogger()
conf := config.DefaultConfig()
conf.StateDir = os.TempDir()
conf.AllocDir = os.TempDir()
upd := &MockAllocStateUpdater{}
alloc := mock.Alloc()
if !restarts {
*alloc.Job.LookupTaskGroup(alloc.TaskGroup).RestartPolicy = structs.RestartPolicy{Attempts: 0}
alloc.Job.Type = structs.JobTypeBatch
}

ar := NewAllocRunner(logger, conf, upd.Update, alloc)
return upd, ar
}

func testAllocRunner(restarts bool) (*MockAllocStateUpdater, *AllocRunner) {
return testAllocRunnerFromAlloc(mock.Alloc(), restarts)
}

func TestAllocRunner_SimpleRun(t *testing.T) {
ctestutil.ExecCompatible(t)
upd, ar := testAllocRunner(false)
Expand All @@ -61,6 +63,59 @@ func TestAllocRunner_SimpleRun(t *testing.T) {
})
}

// TestAllocRuner_RetryArtifact ensures that if one task in a task group is
// retrying fetching an artifact, other tasks in the the group should be able
// to proceed.
func TestAllocRunner_RetryArtifact(t *testing.T) {
ctestutil.ExecCompatible(t)

alloc := mock.Alloc()
alloc.Job.Type = structs.JobTypeBatch
alloc.Job.TaskGroups[0].RestartPolicy.Attempts = 1
alloc.Job.TaskGroups[0].RestartPolicy.Delay = time.Duration(4*testutil.TestMultiplier()) * time.Second

// Create a new task with a bad artifact
badtask := alloc.Job.TaskGroups[0].Tasks[0].Copy()
badtask.Name = "bad"
badtask.Artifacts = []*structs.TaskArtifact{
{GetterSource: "http://127.1.1.111:12315/foo/bar/baz"},
}

alloc.Job.TaskGroups[0].Tasks = append(alloc.Job.TaskGroups[0].Tasks, badtask)
upd, ar := testAllocRunnerFromAlloc(alloc, true)
go ar.Run()
defer ar.Destroy()

testutil.WaitForResult(func() (bool, error) {
if upd.Count < 6 {
return false, fmt.Errorf("Not enough updates")
}
last := upd.Allocs[upd.Count-1]

// web task should have completed successfully while bad task
// retries artififact fetching
webstate := last.TaskStates["web"]
if webstate.State != structs.TaskStateDead {
return false, fmt.Errorf("expected web to be dead but found %q", last.TaskStates["web"].State)
}
if !webstate.Successful() {
return false, fmt.Errorf("expected web to have exited successfully")
}

// bad task should have failed
badstate := last.TaskStates["bad"]
if badstate.State != structs.TaskStateDead {
return false, fmt.Errorf("expected bad to be dead but found %q", last.TaskStates["web"].State)
}
if !badstate.Failed() {
return false, fmt.Errorf("expected bad to have failed")
}
return true, nil
}, func(err error) {
t.Fatalf("err: %v", err)
})
}

func TestAllocRunner_TerminalUpdate_Destroy(t *testing.T) {
ctestutil.ExecCompatible(t)
upd, ar := testAllocRunner(false)
Expand Down
2 changes: 1 addition & 1 deletion client/task_runner.go
Original file line number Diff line number Diff line change
Expand Up @@ -318,7 +318,7 @@ func (r *TaskRunner) run() {

for _, artifact := range r.task.Artifacts {
if err := getter.GetArtifact(r.taskEnv, artifact, taskDir); err != nil {
r.setState(structs.TaskStateDead,
r.setState(structs.TaskStatePending,
structs.NewTaskEvent(structs.TaskArtifactDownloadFailed).SetDownloadError(err))
r.restartTracker.SetStartError(dstructs.NewRecoverableError(err, true))
goto RESTART
Expand Down

0 comments on commit 40e1ffd

Please sign in to comment.