Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use health labels #3122

Merged
merged 2 commits into from
Jun 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 15 additions & 15 deletions api/health/health.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,10 @@ import (
)

const (
// CheckLabel is the label used to differentiate between health checks.
CheckLabel = "check"
// TagLabel is the label used to differentiate between health check tags.
TagLabel = "tag"
// AllTag is automatically added to every registered check.
AllTag = "all"
// ApplicationTag checks will act as if they specified every tag that has
Expand Down Expand Up @@ -62,23 +66,19 @@ type health struct {
}

func New(log logging.Logger, registerer prometheus.Registerer) (Health, error) {
readinessWorker, err := newWorker(log, "readiness", registerer)
if err != nil {
return nil, err
}

healthWorker, err := newWorker(log, "health", registerer)
if err != nil {
return nil, err
}

livenessWorker, err := newWorker(log, "liveness", registerer)
failingChecks := prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "checks_failing",
Help: "number of currently failing health checks",
},
[]string{CheckLabel, TagLabel},
)
return &health{
log: log,
readiness: readinessWorker,
health: healthWorker,
liveness: livenessWorker,
}, err
readiness: newWorker(log, "readiness", failingChecks),
health: newWorker(log, "health", failingChecks),
liveness: newWorker(log, "liveness", failingChecks),
}, registerer.Register(failingChecks)
}

func (h *health) RegisterReadinessCheck(name string, checker Checker, tags ...string) error {
Expand Down
27 changes: 0 additions & 27 deletions api/health/metrics.go

This file was deleted.

61 changes: 38 additions & 23 deletions api/health/worker.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,11 @@ var (
)

type worker struct {
log logging.Logger
namespace string
metrics *metrics
checksLock sync.RWMutex
checks map[string]*taggedChecker
log logging.Logger
name string
failingChecks *prometheus.GaugeVec
checksLock sync.RWMutex
checks map[string]*taggedChecker

resultsLock sync.RWMutex
results map[string]Result
Expand All @@ -53,19 +53,25 @@ type taggedChecker struct {

func newWorker(
log logging.Logger,
namespace string,
registerer prometheus.Registerer,
) (*worker, error) {
metrics, err := newMetrics(namespace, registerer)
name string,
failingChecks *prometheus.GaugeVec,
) *worker {
// Initialize the number of failing checks to 0 for all checks
for _, tag := range []string{AllTag, ApplicationTag} {
failingChecks.With(prometheus.Labels{
CheckLabel: name,
TagLabel: tag,
}).Set(0)
}
return &worker{
log: log,
namespace: namespace,
metrics: metrics,
checks: make(map[string]*taggedChecker),
results: make(map[string]Result),
closer: make(chan struct{}),
tags: make(map[string]set.Set[string]),
}, err
log: log,
name: name,
failingChecks: failingChecks,
checks: make(map[string]*taggedChecker),
results: make(map[string]Result),
closer: make(chan struct{}),
tags: make(map[string]set.Set[string]),
}
}

func (w *worker) RegisterCheck(name string, check Checker, tags ...string) error {
Expand Down Expand Up @@ -107,7 +113,7 @@ func (w *worker) RegisterCheck(name string, check Checker, tags ...string) error

// Whenever a new check is added - it is failing
w.log.Info("registered new check and initialized its state to failing",
zap.String("namespace", w.namespace),
zap.String("name", w.name),
zap.String("name", name),
zap.Strings("tags", tags),
)
Expand Down Expand Up @@ -244,7 +250,7 @@ func (w *worker) runCheck(ctx context.Context, wg *sync.WaitGroup, name string,

if prevResult.Error == nil {
w.log.Warn("check started failing",
zap.String("namespace", w.namespace),
zap.String("name", w.name),
zap.String("name", name),
zap.Strings("tags", check.tags),
zap.Error(err),
Expand All @@ -253,7 +259,7 @@ func (w *worker) runCheck(ctx context.Context, wg *sync.WaitGroup, name string,
}
} else if prevResult.Error != nil {
w.log.Info("check started passing",
zap.String("namespace", w.namespace),
zap.String("name", w.name),
zap.String("name", name),
zap.Strings("tags", check.tags),
)
Expand All @@ -271,7 +277,10 @@ func (w *worker) updateMetrics(tc *taggedChecker, healthy bool, register bool) {
if tc.isApplicationCheck {
// Note: [w.tags] will include AllTag.
for tag := range w.tags {
gauge := w.metrics.failingChecks.WithLabelValues(tag)
gauge := w.failingChecks.With(prometheus.Labels{
CheckLabel: w.name,
TagLabel: tag,
})
if healthy {
gauge.Dec()
} else {
Expand All @@ -285,7 +294,10 @@ func (w *worker) updateMetrics(tc *taggedChecker, healthy bool, register bool) {
}
} else {
for _, tag := range tc.tags {
gauge := w.metrics.failingChecks.WithLabelValues(tag)
gauge := w.failingChecks.With(prometheus.Labels{
CheckLabel: w.name,
TagLabel: tag,
})
if healthy {
gauge.Dec()
} else {
Expand All @@ -297,7 +309,10 @@ func (w *worker) updateMetrics(tc *taggedChecker, healthy bool, register bool) {
}
}
}
gauge := w.metrics.failingChecks.WithLabelValues(AllTag)
gauge := w.failingChecks.With(prometheus.Labels{
CheckLabel: w.name,
TagLabel: AllTag,
})
if healthy {
gauge.Dec()
} else {
Expand Down
Loading