From f255464bf41aee37e2461fb277a074f4863d4aa6 Mon Sep 17 00:00:00 2001 From: Cameron Davison Date: Sun, 10 Jul 2016 16:34:07 -0500 Subject: [PATCH 1/2] if policy mode is delay, do not fail for multiple startup failures, delay instead --- client/restarts.go | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/client/restarts.go b/client/restarts.go index c794c0414afa..e45f135f7e9f 100644 --- a/client/restarts.go +++ b/client/restarts.go @@ -132,9 +132,15 @@ func (r *RestartTracker) handleStartError() (string, time.Duration) { } if r.count > r.policy.Attempts { - r.reason = fmt.Sprintf("Exceeded allowed attempts %d in interval %v", - r.policy.Attempts, r.policy.Interval) - return structs.TaskNotRestarting, 0 + if r.policy.Mode == structs.RestartPolicyModeFail { + r.reason = fmt.Sprintf( + `Exceeded allowed atttempts %d in interval %v and mode is "fail"`, + r.policy.Attempts, r.policy.Interval) + return structs.TaskNotRestarting, 0 + } else { + r.reason = ReasonDelay + return structs.TaskRestarting, r.getDelay() + } } r.reason = ReasonWithinPolicy From f88432427ae204d53f4fada237a5bede2bba960a Mon Sep 17 00:00:00 2001 From: Cameron Davison Date: Mon, 11 Jul 2016 20:54:36 -0500 Subject: [PATCH 2/2] test policy delay for startup error --- client/restarts_test.go | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/client/restarts_test.go b/client/restarts_test.go index e46a73334975..f559f8a9b712 100644 --- a/client/restarts_test.go +++ b/client/restarts_test.go @@ -94,9 +94,9 @@ func TestClient_RestartTracker_ZeroAttempts(t *testing.T) { } } -func TestClient_RestartTracker_StartError_Recoverable(t *testing.T) { +func TestClient_RestartTracker_StartError_Recoverable_Fail(t *testing.T) { t.Parallel() - p := testPolicy(true, structs.RestartPolicyModeDelay) + p := testPolicy(true, structs.RestartPolicyModeFail) rt := newRestartTracker(p, structs.JobTypeSystem) recErr := cstructs.NewRecoverableError(fmt.Errorf("foo"), true) for i := 0; i < p.Attempts; i++ { @@ -114,3 +114,28 @@ func TestClient_RestartTracker_StartError_Recoverable(t *testing.T) { t.Fatalf("NextRestart() returned %v; want %v", state, structs.TaskNotRestarting) } } + +func TestClient_RestartTracker_StartError_Recoverable_Delay(t *testing.T) { + t.Parallel() + p := testPolicy(true, structs.RestartPolicyModeDelay) + rt := newRestartTracker(p, structs.JobTypeSystem) + recErr := cstructs.NewRecoverableError(fmt.Errorf("foo"), true) + for i := 0; i < p.Attempts; i++ { + state, when := rt.SetStartError(recErr).GetState() + if state != structs.TaskRestarting { + t.Fatalf("NextRestart() returned %v, want %v", state, structs.TaskRestarting) + } + if !withinJitter(p.Delay, when) { + t.Fatalf("NextRestart() returned %v; want %v+jitter", when, p.Delay) + } + } + + // Next restart should cause delay + state, when := rt.SetStartError(recErr).GetState() + if state != structs.TaskRestarting { + t.Fatalf("NextRestart() returned %v; want %v", state, structs.TaskRestarting) + } + if !(when > p.Delay && when <= p.Interval) { + t.Fatalf("NextRestart() returned %v; want > %v and <= %v", when, p.Delay, p.Interval) + } +}