Skip to content

Commit

Permalink
Use health labels (#3122)
Browse files Browse the repository at this point in the history
  • Loading branch information
StephenButtolph committed Jun 17, 2024
1 parent 7455c99 commit 576b392
Show file tree
Hide file tree
Showing 3 changed files with 53 additions and 65 deletions.
30 changes: 15 additions & 15 deletions api/health/health.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,10 @@ import (
)

const (
// CheckLabel is the label used to differentiate between health checks.
CheckLabel = "check"
// TagLabel is the label used to differentiate between health check tags.
TagLabel = "tag"
// AllTag is automatically added to every registered check.
AllTag = "all"
// ApplicationTag checks will act as if they specified every tag that has
Expand Down Expand Up @@ -62,23 +66,19 @@ type health struct {
}

func New(log logging.Logger, registerer prometheus.Registerer) (Health, error) {
readinessWorker, err := newWorker(log, "readiness", registerer)
if err != nil {
return nil, err
}

healthWorker, err := newWorker(log, "health", registerer)
if err != nil {
return nil, err
}

livenessWorker, err := newWorker(log, "liveness", registerer)
failingChecks := prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "checks_failing",
Help: "number of currently failing health checks",
},
[]string{CheckLabel, TagLabel},
)
return &health{
log: log,
readiness: readinessWorker,
health: healthWorker,
liveness: livenessWorker,
}, err
readiness: newWorker(log, "readiness", failingChecks),
health: newWorker(log, "health", failingChecks),
liveness: newWorker(log, "liveness", failingChecks),
}, registerer.Register(failingChecks)
}

func (h *health) RegisterReadinessCheck(name string, checker Checker, tags ...string) error {
Expand Down
27 changes: 0 additions & 27 deletions api/health/metrics.go

This file was deleted.

61 changes: 38 additions & 23 deletions api/health/worker.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,11 @@ var (
)

type worker struct {
log logging.Logger
namespace string
metrics *metrics
checksLock sync.RWMutex
checks map[string]*taggedChecker
log logging.Logger
name string
failingChecks *prometheus.GaugeVec
checksLock sync.RWMutex
checks map[string]*taggedChecker

resultsLock sync.RWMutex
results map[string]Result
Expand All @@ -53,19 +53,25 @@ type taggedChecker struct {

func newWorker(
log logging.Logger,
namespace string,
registerer prometheus.Registerer,
) (*worker, error) {
metrics, err := newMetrics(namespace, registerer)
name string,
failingChecks *prometheus.GaugeVec,
) *worker {
// Initialize the number of failing checks to 0 for all checks
for _, tag := range []string{AllTag, ApplicationTag} {
failingChecks.With(prometheus.Labels{
CheckLabel: name,
TagLabel: tag,
}).Set(0)
}
return &worker{
log: log,
namespace: namespace,
metrics: metrics,
checks: make(map[string]*taggedChecker),
results: make(map[string]Result),
closer: make(chan struct{}),
tags: make(map[string]set.Set[string]),
}, err
log: log,
name: name,
failingChecks: failingChecks,
checks: make(map[string]*taggedChecker),
results: make(map[string]Result),
closer: make(chan struct{}),
tags: make(map[string]set.Set[string]),
}
}

func (w *worker) RegisterCheck(name string, check Checker, tags ...string) error {
Expand Down Expand Up @@ -107,7 +113,7 @@ func (w *worker) RegisterCheck(name string, check Checker, tags ...string) error

// Whenever a new check is added - it is failing
w.log.Info("registered new check and initialized its state to failing",
zap.String("namespace", w.namespace),
zap.String("name", w.name),
zap.String("name", name),
zap.Strings("tags", tags),
)
Expand Down Expand Up @@ -244,7 +250,7 @@ func (w *worker) runCheck(ctx context.Context, wg *sync.WaitGroup, name string,

if prevResult.Error == nil {
w.log.Warn("check started failing",
zap.String("namespace", w.namespace),
zap.String("name", w.name),
zap.String("name", name),
zap.Strings("tags", check.tags),
zap.Error(err),
Expand All @@ -253,7 +259,7 @@ func (w *worker) runCheck(ctx context.Context, wg *sync.WaitGroup, name string,
}
} else if prevResult.Error != nil {
w.log.Info("check started passing",
zap.String("namespace", w.namespace),
zap.String("name", w.name),
zap.String("name", name),
zap.Strings("tags", check.tags),
)
Expand All @@ -271,7 +277,10 @@ func (w *worker) updateMetrics(tc *taggedChecker, healthy bool, register bool) {
if tc.isApplicationCheck {
// Note: [w.tags] will include AllTag.
for tag := range w.tags {
gauge := w.metrics.failingChecks.WithLabelValues(tag)
gauge := w.failingChecks.With(prometheus.Labels{
CheckLabel: w.name,
TagLabel: tag,
})
if healthy {
gauge.Dec()
} else {
Expand All @@ -285,7 +294,10 @@ func (w *worker) updateMetrics(tc *taggedChecker, healthy bool, register bool) {
}
} else {
for _, tag := range tc.tags {
gauge := w.metrics.failingChecks.WithLabelValues(tag)
gauge := w.failingChecks.With(prometheus.Labels{
CheckLabel: w.name,
TagLabel: tag,
})
if healthy {
gauge.Dec()
} else {
Expand All @@ -297,7 +309,10 @@ func (w *worker) updateMetrics(tc *taggedChecker, healthy bool, register bool) {
}
}
}
gauge := w.metrics.failingChecks.WithLabelValues(AllTag)
gauge := w.failingChecks.With(prometheus.Labels{
CheckLabel: w.name,
TagLabel: AllTag,
})
if healthy {
gauge.Dec()
} else {
Expand Down

0 comments on commit 576b392

Please sign in to comment.