Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

bulkio: Count corrupt schedules. #54041

Merged
merged 1 commit into from
Sep 29, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions pkg/jobs/job_scheduler.go
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,7 @@ const recheckRunningAfter = 1 * time.Minute
type loopStats struct {
rescheduleWait, rescheduleSkip, started int64
readyToRun, jobsRunning int64
malformed int64
}

func (s *loopStats) updateMetrics(m *SchedulerMetrics) {
Expand All @@ -123,6 +124,7 @@ func (s *loopStats) updateMetrics(m *SchedulerMetrics) {
m.NumRunning.Update(s.jobsRunning)
m.RescheduleSkip.Update(s.rescheduleSkip)
m.RescheduleWait.Update(s.rescheduleWait)
m.NumMalformedSchedules.Update(s.malformed)
}

func (s *jobScheduler) processSchedule(
Expand Down Expand Up @@ -290,7 +292,7 @@ func (s *jobScheduler) executeSchedules(
for _, row := range rows {
schedule, numRunning, err := s.unmarshalScheduledJob(row, cols)
if err != nil {
s.metrics.NumBadSchedules.Inc(1)
stats.malformed++
log.Errorf(ctx, "error parsing schedule: %+v", row)
continue
}
Expand All @@ -303,7 +305,7 @@ func (s *jobScheduler) executeSchedules(
}

// Failed to process schedule.
s.metrics.NumBadSchedules.Inc(1)
s.metrics.NumErrSchedules.Inc(1)
log.Errorf(ctx,
"error processing schedule %d: %+v", schedule.ScheduleID(), processErr)

Expand Down
18 changes: 14 additions & 4 deletions pkg/jobs/schedule_metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,10 @@ type SchedulerMetrics struct {
// Number of schedules rescheduled due to WAIT policy.
RescheduleWait *metric.Gauge
// Number of schedules that could not be processed due to an error.
NumBadSchedules *metric.Counter
NumErrSchedules *metric.Gauge
// Number of schedules that are malformed: that is, the schedules
// we cannot parse, or even attempt to execute.
NumMalformedSchedules *metric.Gauge
}

// MakeSchedulerMetrics returns metrics for scheduled job daemon.
Expand Down Expand Up @@ -83,9 +86,16 @@ func MakeSchedulerMetrics() SchedulerMetrics {
Unit: metric.Unit_COUNT,
}),

NumBadSchedules: metric.NewCounter(metric.Metadata{
Name: "schedules.corrupt",
Help: "Number of corrupt/bad schedules",
NumErrSchedules: metric.NewGauge(metric.Metadata{
Name: "schedules.error",
Help: "Number of schedules which did not execute successfully",
Measurement: "Schedules",
Unit: metric.Unit_COUNT,
}),

NumMalformedSchedules: metric.NewGauge(metric.Metadata{
Name: "schedules.malformed",
Help: "Number of malformed schedules",
Measurement: "Schedules",
Unit: metric.Unit_COUNT,
}),
Expand Down
3 changes: 2 additions & 1 deletion pkg/ts/catalog/chart_catalog.go
Original file line number Diff line number Diff line change
Expand Up @@ -2225,7 +2225,8 @@ var charts = []sectionDescription{
{
Title: "Total",
Metrics: []string{
"schedules.corrupt",
"schedules.malformed",
"schedules.error",
"schedules.total.started",
"schedules.total.succeeded",
"schedules.total.failed",
Expand Down