Skip to content

Commit

Permalink
Don't fail other tasks when retrying artifact get
Browse files Browse the repository at this point in the history
The artifact fetching may be retried and succeed, so don't set the task
as dead.

Fixes #1558
  • Loading branch information
schmichael committed Aug 25, 2016
1 parent 4a3eee9 commit d1da2ce
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 4 deletions.
47 changes: 44 additions & 3 deletions client/alloc_runner_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,22 +25,24 @@ func (m *MockAllocStateUpdater) Update(alloc *structs.Allocation) {
m.Allocs = append(m.Allocs, alloc)
}

func testAllocRunner(restarts bool) (*MockAllocStateUpdater, *AllocRunner) {
func testAllocRunnerFromAlloc(alloc *structs.Allocation, restarts bool) (*MockAllocStateUpdater, *AllocRunner) {
logger := testLogger()
conf := config.DefaultConfig()
conf.StateDir = os.TempDir()
conf.AllocDir = os.TempDir()
upd := &MockAllocStateUpdater{}
alloc := mock.Alloc()
if !restarts {
*alloc.Job.LookupTaskGroup(alloc.TaskGroup).RestartPolicy = structs.RestartPolicy{Attempts: 0}
alloc.Job.Type = structs.JobTypeBatch
}

ar := NewAllocRunner(logger, conf, upd.Update, alloc)
return upd, ar
}

func testAllocRunner(restarts bool) (*MockAllocStateUpdater, *AllocRunner) {
return testAllocRunnerFromAlloc(mock.Alloc(), restarts)
}

func TestAllocRunner_SimpleRun(t *testing.T) {
ctestutil.ExecCompatible(t)
upd, ar := testAllocRunner(false)
Expand All @@ -61,6 +63,45 @@ func TestAllocRunner_SimpleRun(t *testing.T) {
})
}

// TestAllocRuner_RetryArtifact ensures that if one task in a task group is
// retrying fetching an artifact, other tasks in the the group should be able
// to proceed. See #1558
func TestAllocRunner_RetryArtifact(t *testing.T) {
ctestutil.ExecCompatible(t)

alloc := mock.Alloc()

// Create a copy of the task for testing #1558
badtask := alloc.Job.TaskGroups[0].Tasks[0].Copy()
badtask.Name = "bad"

// Add a bad artifact to one of the tasks
badtask.Artifacts = []*structs.TaskArtifact{
{GetterSource: "http://127.1.1.111:12315/foo/bar/baz"},
}

alloc.Job.TaskGroups[0].Tasks = append(alloc.Job.TaskGroups[0].Tasks, badtask)
upd, ar := testAllocRunnerFromAlloc(alloc, true)
go ar.Run()
defer ar.Destroy()

testutil.WaitForResult(func() (bool, error) {
if upd.Count < 6 {
return false, fmt.Errorf("Not enough updates")
}
last := upd.Allocs[upd.Count-1]
if last.TaskStates["web"].State != structs.TaskStatePending {
return false, fmt.Errorf("expected web to be pending but found %q", last.TaskStates["web"].State)
}
if last.TaskStates["bad"].State != structs.TaskStatePending {
return false, fmt.Errorf("expected bad to be pending but found %q", last.TaskStates["web"].State)
}
return true, nil
}, func(err error) {
t.Fatalf("err: %v", err)
})
}

func TestAllocRunner_TerminalUpdate_Destroy(t *testing.T) {
ctestutil.ExecCompatible(t)
upd, ar := testAllocRunner(false)
Expand Down
2 changes: 1 addition & 1 deletion client/task_runner.go
Original file line number Diff line number Diff line change
Expand Up @@ -318,7 +318,7 @@ func (r *TaskRunner) run() {

for _, artifact := range r.task.Artifacts {
if err := getter.GetArtifact(r.taskEnv, artifact, taskDir); err != nil {
r.setState(structs.TaskStateDead,
r.setState(structs.TaskStatePending,
structs.NewTaskEvent(structs.TaskArtifactDownloadFailed).SetDownloadError(err))
r.restartTracker.SetStartError(dstructs.NewRecoverableError(err, true))
goto RESTART
Expand Down

0 comments on commit d1da2ce

Please sign in to comment.