Skip to content

Commit

Permalink
implement alloc runner task restart hook
Browse files Browse the repository at this point in the history
Most allocation hooks don't need to know when a single task within the
allocation is restarted. The check watcher for group services triggers the
alloc runner to restart all tasks, but the alloc runner's `Restart` method
doesn't trigger any of the alloc hooks, including the group service hook. The
result is that after the first time a check triggers a restart, we'll never
restart the tasks of an allocation again.

This commit adds a `RunnerTaskRestartHook` interface so that alloc runner
hooks can act if a task within the alloc is restarted. The only implementation
is in the group service hook, which will force a re-registration of the
alloc's services and fix check restarts.
  • Loading branch information
tgross committed Jan 21, 2021
1 parent afa0018 commit 21dbd38
Show file tree
Hide file tree
Showing 4 changed files with 48 additions and 0 deletions.
7 changes: 7 additions & 0 deletions client/allocrunner/alloc_runner.go
Original file line number Diff line number Diff line change
Expand Up @@ -1138,13 +1138,17 @@ func (ar *allocRunner) Restart(ctx context.Context, event *structs.TaskEvent, fa
var err *multierror.Error
var errMutex sync.Mutex

// run alloc task restart hooks
ar.taskRestartHooks()

go func() {
var wg sync.WaitGroup
defer close(waitCh)
for tn, tr := range ar.tasks {
wg.Add(1)
go func(taskName string, r agentconsul.WorkloadRestarter) {
defer wg.Done()

e := r.Restart(ctx, event, failure)
if e != nil {
errMutex.Lock()
Expand All @@ -1170,6 +1174,9 @@ func (ar *allocRunner) Restart(ctx context.Context, event *structs.TaskEvent, fa
func (ar *allocRunner) RestartAll(taskEvent *structs.TaskEvent) error {
var err *multierror.Error

// run alloc task restart hooks
ar.taskRestartHooks()

for tn := range ar.tasks {
rerr := ar.RestartTask(tn, taskEvent.Copy())
if rerr != nil {
Expand Down
25 changes: 25 additions & 0 deletions client/allocrunner/alloc_runner_hooks.go
Original file line number Diff line number Diff line change
Expand Up @@ -360,3 +360,28 @@ func (ar *allocRunner) shutdownHooks() {
}
}
}

func (ar *allocRunner) taskRestartHooks() {
for _, hook := range ar.runnerHooks {
re, ok := hook.(interfaces.RunnerTaskRestartHook)
if !ok {
continue
}

name := re.Name()
var start time.Time
if ar.logger.IsTrace() {
start = time.Now()
ar.logger.Trace("running alloc task restart hook",
"name", name, "start", start)
}

re.PreTaskRestart()

if ar.logger.IsTrace() {
end := time.Now()
ar.logger.Trace("finished alloc task restart hook",
"name", name, "end", end, "duration", end.Sub(start))
}
}
}
8 changes: 8 additions & 0 deletions client/allocrunner/groupservice_hook.go
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,14 @@ func (h *groupServiceHook) Update(req *interfaces.RunnerUpdateRequest) error {
return h.consulClient.UpdateWorkload(oldWorkloadServices, newWorkloadServices)
}

func (h *groupServiceHook) PreTaskRestart() error {
// TODO: refactor these hooks so that we can call both of them with a
// single mutex to lock both, so that we don't end up having a narrow race
// condition for Update calls
h.PreKill()
return h.Prerun()
}

func (h *groupServiceHook) PreKill() {
h.mu.Lock()
defer h.mu.Unlock()
Expand Down
8 changes: 8 additions & 0 deletions client/allocrunner/interfaces/runner_lifecycle.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,14 @@ type RunnerUpdateRequest struct {
Alloc *structs.Allocation
}

// RunnerTaskRestartHooks are executed just before the allocation runner is
// going to restart all tasks.
type RunnerTaskRestartHook interface {
RunnerHook

PreTaskRestart() error
}

// ShutdownHook may be implemented by AllocRunner or TaskRunner hooks and will
// be called when the agent process is being shutdown gracefully.
type ShutdownHook interface {
Expand Down

0 comments on commit 21dbd38

Please sign in to comment.