From 14d46f579d1bbfde20c183416f4878f8c1a90ac5 Mon Sep 17 00:00:00 2001 From: Chelsea Holland Komlo Date: Thu, 26 Apr 2018 16:57:45 -0400 Subject: [PATCH 1/5] handle potential panic in cron parsing --- nomad/job_endpoint.go | 5 ++++- nomad/leader.go | 5 ++++- nomad/periodic.go | 10 ++++++++-- nomad/structs/structs.go | 23 +++++++++++++++++------ 4 files changed, 33 insertions(+), 10 deletions(-) diff --git a/nomad/job_endpoint.go b/nomad/job_endpoint.go index 03ca374429b3..9923b995f79e 100644 --- a/nomad/job_endpoint.go +++ b/nomad/job_endpoint.go @@ -1217,7 +1217,10 @@ func (j *Job) Plan(args *structs.JobPlanRequest, reply *structs.JobPlanResponse) // If it is a periodic job calculate the next launch if args.Job.IsPeriodic() && args.Job.Periodic.Enabled { - reply.NextPeriodicLaunch = args.Job.Periodic.Next(time.Now().In(args.Job.Periodic.GetLocation())) + reply.NextPeriodicLaunch, err = args.Job.Periodic.Next(time.Now().In(args.Job.Periodic.GetLocation())) + if err != nil { + return fmt.Errorf("Failed to parse cron time %v", err) + } } reply.FailedTGAllocs = updatedEval.FailedTGAllocs diff --git a/nomad/leader.go b/nomad/leader.go index f65a22477727..ead72778ed5c 100644 --- a/nomad/leader.go +++ b/nomad/leader.go @@ -400,7 +400,10 @@ func (s *Server) restorePeriodicDispatcher() error { } // nextLaunch is the next launch that should occur. - nextLaunch := job.Periodic.Next(launch.Launch.In(job.Periodic.GetLocation())) + nextLaunch, err := job.Periodic.Next(launch.Launch.In(job.Periodic.GetLocation())) + if err != nil { + return fmt.Errorf("failed to parse periodic time: %v", err) + } // We skip force launching the job if there should be no next launch // (the zero case) or if the next launch time is in the future. If it is diff --git a/nomad/periodic.go b/nomad/periodic.go index 54b264b8adc5..c140cd8e0968 100644 --- a/nomad/periodic.go +++ b/nomad/periodic.go @@ -221,7 +221,10 @@ func (p *PeriodicDispatch) Add(job *structs.Job) error { // Add or update the job. p.tracked[tuple] = job - next := job.Periodic.Next(time.Now().In(job.Periodic.GetLocation())) + next, err := job.Periodic.Next(time.Now().In(job.Periodic.GetLocation())) + if err != nil { + return fmt.Errorf("[ERR] nomad.periodic unable to parse cron expression: %v", err) + } if tracked { if err := p.heap.Update(job, next); err != nil { return fmt.Errorf("failed to update job %q (%s) launch time: %v", job.ID, job.Namespace, err) @@ -344,7 +347,10 @@ func (p *PeriodicDispatch) run(ctx context.Context) { func (p *PeriodicDispatch) dispatch(job *structs.Job, launchTime time.Time) { p.l.Lock() - nextLaunch := job.Periodic.Next(launchTime) + nextLaunch, err := job.Periodic.Next(launchTime) + if err != nil { + p.logger.Printf("[ERR] nomad.periodic: failed to parse periodic job %v", err) + } if err := p.heap.Update(job, nextLaunch); err != nil { p.logger.Printf("[ERR] nomad.periodic: failed to update next launch of periodic job %q (%s): %v", job.ID, job.Namespace, err) } diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go index 60d8f0d80191..91830e914574 100644 --- a/nomad/structs/structs.go +++ b/nomad/structs/structs.go @@ -2649,20 +2649,31 @@ func (p *PeriodicConfig) Canonicalize() { p.location = l } +func cronParseNext(e *cronexpr.Expression, fromTime time.Time) (t time.Time, err error) { + defer func() { + if recover() != nil { + t = time.Time{} + err = fmt.Errorf("Unable to parse cron expression from time") + } + }() + + return e.Next(fromTime), nil +} + // Next returns the closest time instant matching the spec that is after the // passed time. If no matching instance exists, the zero value of time.Time is // returned. The `time.Location` of the returned value matches that of the // passed time. -func (p *PeriodicConfig) Next(fromTime time.Time) time.Time { +func (p *PeriodicConfig) Next(fromTime time.Time) (time.Time, error) { switch p.SpecType { case PeriodicSpecCron: if e, err := cronexpr.Parse(p.Spec); err == nil { - return e.Next(fromTime) + return cronParseNext(e, fromTime) } case PeriodicSpecTest: split := strings.Split(p.Spec, ",") if len(split) == 1 && split[0] == "" { - return time.Time{} + return time.Time{}, nil } // Parse the times @@ -2670,7 +2681,7 @@ func (p *PeriodicConfig) Next(fromTime time.Time) time.Time { for i, s := range split { unix, err := strconv.Atoi(s) if err != nil { - return time.Time{} + return time.Time{}, nil } times[i] = time.Unix(int64(unix), 0) @@ -2679,12 +2690,12 @@ func (p *PeriodicConfig) Next(fromTime time.Time) time.Time { // Find the next match for _, next := range times { if fromTime.Before(next) { - return next + return next, nil } } } - return time.Time{} + return time.Time{}, nil } // GetLocation returns the location to use for determining the time zone to run From 67df44cd5e63d9704dfe03393d3d87e0014d2d01 Mon Sep 17 00:00:00 2001 From: Alex Dadgar Date: Thu, 26 Apr 2018 15:15:43 -0700 Subject: [PATCH 2/5] UX touchups --- nomad/fsm.go | 2 +- nomad/job_endpoint.go | 2 +- nomad/leader.go | 3 ++- nomad/periodic.go | 9 ++++----- nomad/structs/structs.go | 16 +++++++++++++--- nomad/structs/structs_test.go | 15 ++++++++++++--- 6 files changed, 33 insertions(+), 14 deletions(-) diff --git a/nomad/fsm.go b/nomad/fsm.go index 9ab5368d85b9..4ab6dd108e60 100644 --- a/nomad/fsm.go +++ b/nomad/fsm.go @@ -425,7 +425,7 @@ func (n *nomadFSM) applyUpsertJob(buf []byte, index uint64) interface{} { // tracking it. if err := n.periodicDispatcher.Add(req.Job); err != nil { n.logger.Printf("[ERR] nomad.fsm: periodicDispatcher.Add failed: %v", err) - return err + return fmt.Errorf("failed adding job to periodic dispatcher: %v", err) } // Create a watch set diff --git a/nomad/job_endpoint.go b/nomad/job_endpoint.go index 9923b995f79e..4acc885fffad 100644 --- a/nomad/job_endpoint.go +++ b/nomad/job_endpoint.go @@ -1219,7 +1219,7 @@ func (j *Job) Plan(args *structs.JobPlanRequest, reply *structs.JobPlanResponse) if args.Job.IsPeriodic() && args.Job.Periodic.Enabled { reply.NextPeriodicLaunch, err = args.Job.Periodic.Next(time.Now().In(args.Job.Periodic.GetLocation())) if err != nil { - return fmt.Errorf("Failed to parse cron time %v", err) + return fmt.Errorf("Failed to parse cron expression: %v", err) } } diff --git a/nomad/leader.go b/nomad/leader.go index ead72778ed5c..a30ea9f2dd21 100644 --- a/nomad/leader.go +++ b/nomad/leader.go @@ -402,7 +402,8 @@ func (s *Server) restorePeriodicDispatcher() error { // nextLaunch is the next launch that should occur. nextLaunch, err := job.Periodic.Next(launch.Launch.In(job.Periodic.GetLocation())) if err != nil { - return fmt.Errorf("failed to parse periodic time: %v", err) + s.logger.Printf("[ERR] nomad.periodic: failed to determine next periodic launch for job %s: %v", job.NamespacedID(), err) + continue } // We skip force launching the job if there should be no next launch diff --git a/nomad/periodic.go b/nomad/periodic.go index c140cd8e0968..05f953189c6c 100644 --- a/nomad/periodic.go +++ b/nomad/periodic.go @@ -223,7 +223,7 @@ func (p *PeriodicDispatch) Add(job *structs.Job) error { p.tracked[tuple] = job next, err := job.Periodic.Next(time.Now().In(job.Periodic.GetLocation())) if err != nil { - return fmt.Errorf("[ERR] nomad.periodic unable to parse cron expression: %v", err) + return fmt.Errorf("failed adding job %s: %v", job.NamespacedID(), err) } if tracked { if err := p.heap.Update(job, next); err != nil { @@ -349,10 +349,9 @@ func (p *PeriodicDispatch) dispatch(job *structs.Job, launchTime time.Time) { nextLaunch, err := job.Periodic.Next(launchTime) if err != nil { - p.logger.Printf("[ERR] nomad.periodic: failed to parse periodic job %v", err) - } - if err := p.heap.Update(job, nextLaunch); err != nil { - p.logger.Printf("[ERR] nomad.periodic: failed to update next launch of periodic job %q (%s): %v", job.ID, job.Namespace, err) + p.logger.Printf("[ERR] nomad.periodic: failed to parse next periodic launch for job %s: %v", job.NamespacedID(), err) + } else if err := p.heap.Update(job, nextLaunch); err != nil { + p.logger.Printf("[ERR] nomad.periodic: failed to update next launch of periodic job %s: %v", job.NamespacedID(), err) } // If the job prohibits overlapping and there are running children, we skip diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go index 91830e914574..a9df935ceefd 100644 --- a/nomad/structs/structs.go +++ b/nomad/structs/structs.go @@ -1997,6 +1997,14 @@ type Job struct { JobModifyIndex uint64 } +// NamespacedID returns the namespaced id useful for logging +func (j *Job) NamespacedID() *NamespacedID { + return &NamespacedID{ + ID: j.ID, + Namespace: j.Namespace, + } +} + // Canonicalize is used to canonicalize fields in the Job. This should be called // when registering a Job. A set of warnings are returned if the job was changed // in anyway that the user should be made aware of. @@ -2649,11 +2657,13 @@ func (p *PeriodicConfig) Canonicalize() { p.location = l } -func cronParseNext(e *cronexpr.Expression, fromTime time.Time) (t time.Time, err error) { +// cronParseNext is a helper that parses the next time for the given expression +// but captures any panic that may occur in the underlying library. +func cronParseNext(e *cronexpr.Expression, fromTime time.Time, spec string) (t time.Time, err error) { defer func() { if recover() != nil { t = time.Time{} - err = fmt.Errorf("Unable to parse cron expression from time") + err = fmt.Errorf("failed parsing cron expression: %q", spec) } }() @@ -2668,7 +2678,7 @@ func (p *PeriodicConfig) Next(fromTime time.Time) (time.Time, error) { switch p.SpecType { case PeriodicSpecCron: if e, err := cronexpr.Parse(p.Spec); err == nil { - return cronParseNext(e, fromTime) + return cronParseNext(e, fromTime, p.Spec) } case PeriodicSpecTest: split := strings.Split(p.Spec, ",") diff --git a/nomad/structs/structs_test.go b/nomad/structs/structs_test.go index 0720b0dfbf3b..49f1ad1991a2 100644 --- a/nomad/structs/structs_test.go +++ b/nomad/structs/structs_test.go @@ -2015,7 +2015,10 @@ func TestPeriodicConfig_NextCron(t *testing.T) { for i, spec := range specs { p := &PeriodicConfig{Enabled: true, SpecType: PeriodicSpecCron, Spec: spec} p.Canonicalize() - n := p.Next(from) + n, err := p.Next(from) + if err != nil { + t.Fatalf("Next returned error: %v", err) + } if expected[i] != n { t.Fatalf("Next(%v) returned %v; want %v", from, n, expected[i]) } @@ -2050,8 +2053,14 @@ func TestPeriodicConfig_DST(t *testing.T) { e1 := time.Date(2017, time.March, 11, 10, 0, 0, 0, time.UTC) e2 := time.Date(2017, time.March, 12, 9, 0, 0, 0, time.UTC) - n1 := p.Next(t1).UTC() - n2 := p.Next(t2).UTC() + n1, err1 := p.Next(t1) + n2, err2 := p.Next(t2) + if err1 != nil || err2 != nil { + t.Fatalf("bad: %v %v", err1, err2) + } + + n1 = n1.UTC() + n2 = n2.UTC() if !reflect.DeepEqual(e1, n1) { t.Fatalf("Got %v; want %v", n1, e1) From 15623e52cd6a1e880b5acd58359772c9e04745a8 Mon Sep 17 00:00:00 2001 From: Alex Dadgar Date: Thu, 26 Apr 2018 15:46:22 -0700 Subject: [PATCH 3/5] Fix command line --- api/jobs.go | 7 ++++--- command/job_run.go | 10 +++++++--- command/job_status.go | 10 ++++++---- nomad/structs/structs.go | 6 +++--- 4 files changed, 20 insertions(+), 13 deletions(-) diff --git a/api/jobs.go b/api/jobs.go index 7c2b7c72310f..b56b9c8646bb 100644 --- a/api/jobs.go +++ b/api/jobs.go @@ -9,6 +9,7 @@ import ( "github.com/gorhill/cronexpr" "github.com/hashicorp/nomad/helper" + "github.com/hashicorp/nomad/nomad/structs" ) const ( @@ -537,14 +538,14 @@ func (p *PeriodicConfig) Canonicalize() { // passed time. If no matching instance exists, the zero value of time.Time is // returned. The `time.Location` of the returned value matches that of the // passed time. -func (p *PeriodicConfig) Next(fromTime time.Time) time.Time { +func (p *PeriodicConfig) Next(fromTime time.Time) (time.Time, error) { if *p.SpecType == PeriodicSpecCron { if e, err := cronexpr.Parse(*p.Spec); err == nil { - return e.Next(fromTime) + return structs.CronParseNext(e, fromTime, *p.Spec) } } - return time.Time{} + return time.Time{}, nil } func (p *PeriodicConfig) GetLocation() (*time.Location, error) { diff --git a/command/job_run.go b/command/job_run.go index 7f17decd5182..cc6c5b924748 100644 --- a/command/job_run.go +++ b/command/job_run.go @@ -247,9 +247,13 @@ func (c *JobRunCommand) Run(args []string) int { loc, err := job.Periodic.GetLocation() if err == nil { now := time.Now().In(loc) - next := job.Periodic.Next(now) - c.Ui.Output(fmt.Sprintf("Approximate next launch time: %s (%s from now)", - formatTime(next), formatTimeDifference(now, next, time.Second))) + next, err := job.Periodic.Next(now) + if err != nil { + c.Ui.Error(fmt.Sprintf("Error determining next launch time: %v", err)) + } else { + c.Ui.Output(fmt.Sprintf("Approximate next launch time: %s (%s from now)", + formatTime(next), formatTimeDifference(now, next, time.Second))) + } } } else if !paramjob { c.Ui.Output("Evaluation ID: " + evalID) diff --git a/command/job_status.go b/command/job_status.go index af7d18af9ca5..9e7f9db8bb88 100644 --- a/command/job_status.go +++ b/command/job_status.go @@ -185,10 +185,12 @@ func (c *JobStatusCommand) Run(args []string) int { location, err := job.Periodic.GetLocation() if err == nil { now := time.Now().In(location) - next := job.Periodic.Next(now) - basic = append(basic, fmt.Sprintf("Next Periodic Launch|%s", - fmt.Sprintf("%s (%s from now)", - formatTime(next), formatTimeDifference(now, next, time.Second)))) + next, err := job.Periodic.Next(now) + if err == nil { + basic = append(basic, fmt.Sprintf("Next Periodic Launch|%s", + fmt.Sprintf("%s (%s from now)", + formatTime(next), formatTimeDifference(now, next, time.Second)))) + } } } } diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go index a9df935ceefd..1ea012dea092 100644 --- a/nomad/structs/structs.go +++ b/nomad/structs/structs.go @@ -2657,9 +2657,9 @@ func (p *PeriodicConfig) Canonicalize() { p.location = l } -// cronParseNext is a helper that parses the next time for the given expression +// CronParseNext is a helper that parses the next time for the given expression // but captures any panic that may occur in the underlying library. -func cronParseNext(e *cronexpr.Expression, fromTime time.Time, spec string) (t time.Time, err error) { +func CronParseNext(e *cronexpr.Expression, fromTime time.Time, spec string) (t time.Time, err error) { defer func() { if recover() != nil { t = time.Time{} @@ -2678,7 +2678,7 @@ func (p *PeriodicConfig) Next(fromTime time.Time) (time.Time, error) { switch p.SpecType { case PeriodicSpecCron: if e, err := cronexpr.Parse(p.Spec); err == nil { - return cronParseNext(e, fromTime, p.Spec) + return CronParseNext(e, fromTime, p.Spec) } case PeriodicSpecTest: split := strings.Split(p.Spec, ",") From 7ef3ca0a690e9c0c86b51577f3d179409c833fbb Mon Sep 17 00:00:00 2001 From: Chelsea Holland Komlo Date: Thu, 26 Apr 2018 18:36:23 -0400 Subject: [PATCH 4/5] add unit tests for panic cron parsing bug add comments for cron parsing wrapper --- nomad/structs/structs_test.go | 59 ++++++++++++++++++++++++----------- 1 file changed, 40 insertions(+), 19 deletions(-) diff --git a/nomad/structs/structs_test.go b/nomad/structs/structs_test.go index 49f1ad1991a2..b8e63a023a7d 100644 --- a/nomad/structs/structs_test.go +++ b/nomad/structs/structs_test.go @@ -2009,18 +2009,44 @@ func TestPeriodicConfig_ValidCron(t *testing.T) { } func TestPeriodicConfig_NextCron(t *testing.T) { + require := require.New(t) + + type testExpectation struct { + Time time.Time + HasError bool + ErrorMsg string + } + from := time.Date(2009, time.November, 10, 23, 22, 30, 0, time.UTC) - specs := []string{"0 0 29 2 * 1980", "*/5 * * * *"} - expected := []time.Time{{}, time.Date(2009, time.November, 10, 23, 25, 0, 0, time.UTC)} + specs := []string{"0 0 29 2 * 1980", + "*/5 * * * *", + "1 15-0 * * 1-5"} + expected := []*testExpectation{ + { + Time: time.Time{}, + HasError: false, + }, + { + Time: time.Date(2009, time.November, 10, 23, 25, 0, 0, time.UTC), + HasError: false, + }, + { + Time: time.Time{}, + HasError: true, + ErrorMsg: "failed parsing cron expression", + }, + } + for i, spec := range specs { p := &PeriodicConfig{Enabled: true, SpecType: PeriodicSpecCron, Spec: spec} p.Canonicalize() n, err := p.Next(from) + nextExpected := expected[i] + + require.Equal(nextExpected.Time, n) + require.Equal(err != nil, nextExpected.HasError) if err != nil { - t.Fatalf("Next returned error: %v", err) - } - if expected[i] != n { - t.Fatalf("Next(%v) returned %v; want %v", from, n, expected[i]) + require.True(strings.Contains(err.Error(), nextExpected.ErrorMsg)) } } } @@ -2037,6 +2063,8 @@ func TestPeriodicConfig_ValidTimeZone(t *testing.T) { } func TestPeriodicConfig_DST(t *testing.T) { + require := require.New(t) + // On Sun, Mar 12, 2:00 am 2017: +1 hour UTC p := &PeriodicConfig{ Enabled: true, @@ -2053,21 +2081,14 @@ func TestPeriodicConfig_DST(t *testing.T) { e1 := time.Date(2017, time.March, 11, 10, 0, 0, 0, time.UTC) e2 := time.Date(2017, time.March, 12, 9, 0, 0, 0, time.UTC) - n1, err1 := p.Next(t1) - n2, err2 := p.Next(t2) - if err1 != nil || err2 != nil { - t.Fatalf("bad: %v %v", err1, err2) - } + n1, err := p.Next(t1) + require.Nil(err) - n1 = n1.UTC() - n2 = n2.UTC() + n2, err := p.Next(t2) + require.Nil(err) - if !reflect.DeepEqual(e1, n1) { - t.Fatalf("Got %v; want %v", n1, e1) - } - if !reflect.DeepEqual(e2, n2) { - t.Fatalf("Got %v; want %v", n1, e1) - } + require.Equal(e1, n1.UTC()) + require.Equal(e2, n2.UTC()) } func TestRestartPolicy_Validate(t *testing.T) { From c62bb84c540e5b8478546dc8db64c4a36021fd1d Mon Sep 17 00:00:00 2001 From: Alex Dadgar Date: Thu, 26 Apr 2018 15:51:47 -0700 Subject: [PATCH 5/5] Allow nomad to restore bad periodic job --- nomad/leader.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/nomad/leader.go b/nomad/leader.go index a30ea9f2dd21..a1a65b05f251 100644 --- a/nomad/leader.go +++ b/nomad/leader.go @@ -379,7 +379,8 @@ func (s *Server) restorePeriodicDispatcher() error { } if err := s.periodicDispatcher.Add(job); err != nil { - return err + s.logger.Printf("[ERR] nomad.periodic: %v", err) + continue } // We do not need to force run the job since it isn't active.