Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add "alertmanager_provider_alerts" metric #1589

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion cmd/alertmanager/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -317,7 +317,7 @@ func run() int {
go peer.Settle(ctx, *gossipInterval*10)
}

alerts, err := mem.NewAlerts(context.Background(), marker, *alertGCInterval, logger)
alerts, err := mem.NewAlerts(context.Background(), marker, *alertGCInterval, logger, prometheus.DefaultRegisterer)
if err != nil {
level.Error(logger).Log("err", err)
return 1
Expand Down
5 changes: 3 additions & 2 deletions dispatch/dispatch_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -363,8 +363,9 @@ route:

logger := log.NewNopLogger()
route := NewRoute(conf.Route, nil)
marker := types.NewMarker(prometheus.NewRegistry())
alerts, err := mem.NewAlerts(context.Background(), marker, time.Hour, logger)
r := prometheus.NewRegistry()
marker := types.NewMarker(r)
alerts, err := mem.NewAlerts(context.Background(), marker, time.Hour, logger, r)
if err != nil {
t.Fatal(err)
}
Expand Down
34 changes: 33 additions & 1 deletion provider/mem/mem.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ package mem

import (
"context"
"github.com/prometheus/client_golang/prometheus"
"sync"
"time"

Expand Down Expand Up @@ -47,7 +48,7 @@ type listeningAlerts struct {
}

// NewAlerts returns a new alert provider.
func NewAlerts(ctx context.Context, m types.Marker, intervalGC time.Duration, l log.Logger) (*Alerts, error) {
func NewAlerts(ctx context.Context, m types.Marker, intervalGC time.Duration, l log.Logger, r prometheus.Registerer) (*Alerts, error) {
ctx, cancel := context.WithCancel(ctx)
a := &Alerts{
alerts: store.NewAlerts(),
Expand Down Expand Up @@ -76,11 +77,31 @@ func NewAlerts(ctx context.Context, m types.Marker, intervalGC time.Duration, l
}
a.mtx.Unlock()
})
a.registerMetrics(r)
go a.alerts.Run(ctx, intervalGC)

return a, nil
}

func (a *Alerts) registerMetrics(r prometheus.Registerer) {
newAlertMetricByState := func(st model.AlertStatus) prometheus.GaugeFunc {
return prometheus.NewGaugeFunc(
prometheus.GaugeOpts{
Name: "alertmanager_provider_alerts",
Help: "How many alerts for provider by state.",
ConstLabels: prometheus.Labels{"state": string(st)},
},
func() float64 {
return float64(a.countPending(st))
},
)
}

alertsFiring := newAlertMetricByState(model.AlertFiring)

r.MustRegister(alertsFiring)
}

// Close the alert provider.
func (a *Alerts) Close() {
if a.cancel != nil {
Expand Down Expand Up @@ -141,6 +162,17 @@ func (a *Alerts) GetPending() provider.AlertIterator {
return provider.NewAlertIterator(ch, done, nil)
}

func (a *Alerts) countPending(status model.AlertStatus) int {
count := 0

for _, a := range a.alerts.List() {
if a.Status() == status {
count++
}
}
return count
}

// Get returns the alert for a given fingerprint.
func (a *Alerts) Get(fp model.Fingerprint) (*types.Alert, error) {
return a.alerts.Get(fp)
Expand Down
65 changes: 55 additions & 10 deletions provider/mem/mem_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -84,8 +84,9 @@ func init() {
// If the channel of a listener is at its limit, `alerts.Lock` is blocked, whereby
// a listener can not unsubscribe as the lock is hold by `alerts.Lock`.
func TestAlertsSubscribePutStarvation(t *testing.T) {
marker := types.NewMarker(prometheus.NewRegistry())
alerts, err := NewAlerts(context.Background(), marker, 30*time.Minute, log.NewNopLogger())
r := prometheus.NewRegistry()
marker := types.NewMarker(r)
alerts, err := NewAlerts(context.Background(), marker, 30*time.Minute, log.NewNopLogger(), r)
if err != nil {
t.Fatal(err)
}
Expand Down Expand Up @@ -135,8 +136,9 @@ func TestAlertsSubscribePutStarvation(t *testing.T) {
}

func TestAlertsPut(t *testing.T) {
marker := types.NewMarker(prometheus.NewRegistry())
alerts, err := NewAlerts(context.Background(), marker, 30*time.Minute, log.NewNopLogger())
r := prometheus.NewRegistry()
marker := types.NewMarker(r)
alerts, err := NewAlerts(context.Background(), marker, 30*time.Minute, log.NewNopLogger(), r)
if err != nil {
t.Fatal(err)
}
Expand All @@ -160,11 +162,12 @@ func TestAlertsPut(t *testing.T) {
}

func TestAlertsSubscribe(t *testing.T) {
marker := types.NewMarker(prometheus.NewRegistry())
r := prometheus.NewRegistry()
marker := types.NewMarker(r)

ctx, cancel := context.WithCancel(context.Background())
defer cancel()
alerts, err := NewAlerts(ctx, marker, 30*time.Minute, log.NewNopLogger())
alerts, err := NewAlerts(ctx, marker, 30*time.Minute, log.NewNopLogger(), r)
if err != nil {
t.Fatal(err)
}
Expand Down Expand Up @@ -240,8 +243,9 @@ func TestAlertsSubscribe(t *testing.T) {
}

func TestAlertsGetPending(t *testing.T) {
marker := types.NewMarker(prometheus.NewRegistry())
alerts, err := NewAlerts(context.Background(), marker, 30*time.Minute, log.NewNopLogger())
r := prometheus.NewRegistry()
marker := types.NewMarker(r)
alerts, err := NewAlerts(context.Background(), marker, 30*time.Minute, log.NewNopLogger(), r)
if err != nil {
t.Fatal(err)
}
Expand Down Expand Up @@ -282,9 +286,50 @@ func TestAlertsGetPending(t *testing.T) {
}
}

func TestAlertsCountPending(t *testing.T) {
r := prometheus.NewRegistry()
marker := types.NewMarker(r)
alerts, err := NewAlerts(context.Background(), marker, 30*time.Minute, log.NewNopLogger(), r)
if err != nil {
t.Fatal(err)
}

checkCount := func(status model.AlertStatus, expected int) {
count := alerts.countPending(status)
if !(count == expected) {
t.Errorf("Unexpected alert count %d instead of %d for status '%s'", count, expected, status)
}
}

tResolved := time.Now().Add(-time.Second)
tFiring := time.Now().Add(time.Second)

a1 := types.Alert(*alert1)
a2 := types.Alert(*alert2)

a1.EndsAt = tFiring
a2.EndsAt = tFiring

checkCount(model.AlertResolved, 0)
checkCount(model.AlertFiring, 0)

if err := alerts.Put(&a1, &a2); err != nil {
t.Fatalf("Insert failed: %s", err)
}

checkCount(model.AlertResolved, 0)
checkCount(model.AlertFiring, 2)

a1.EndsAt = tResolved

checkCount(model.AlertResolved, 1)
checkCount(model.AlertFiring, 1)
}

func TestAlertsGC(t *testing.T) {
marker := types.NewMarker(prometheus.NewRegistry())
alerts, err := NewAlerts(context.Background(), marker, 200*time.Millisecond, log.NewNopLogger())
r := prometheus.NewRegistry()
marker := types.NewMarker(r)
alerts, err := NewAlerts(context.Background(), marker, 200*time.Millisecond, log.NewNopLogger(), r)
if err != nil {
t.Fatal(err)
}
Expand Down