diff --git a/api/v1/api_test.go b/api/v1/api_test.go index a2991ab497..d1a66c71a8 100644 --- a/api/v1/api_test.go +++ b/api/v1/api_test.go @@ -58,9 +58,8 @@ func newFakeAlerts(alerts []*types.Alert, withErr bool) *fakeAlerts { func (f *fakeAlerts) Subscribe() provider.AlertIterator { return nil } func (f *fakeAlerts) Get(model.Fingerprint) (*types.Alert, error) { return nil, nil } -func (f *fakeAlerts) Put(alerts ...*types.Alert) error { - return f.err -} +func (f *fakeAlerts) Put(alerts ...*types.Alert) error { return f.err } +func (f *fakeAlerts) CountPending(model.AlertStatus) int { return -1 } func (f *fakeAlerts) GetPending() provider.AlertIterator { ch := make(chan *types.Alert) done := make(chan struct{}) diff --git a/cmd/alertmanager/main.go b/cmd/alertmanager/main.go index 11fe7c9415..6980c43729 100644 --- a/cmd/alertmanager/main.go +++ b/cmd/alertmanager/main.go @@ -34,6 +34,7 @@ import ( "github.com/go-kit/kit/log/level" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promhttp" + "github.com/prometheus/common/model" "github.com/prometheus/common/promlog" promlogflag "github.com/prometheus/common/promlog/flag" "github.com/prometheus/common/route" @@ -48,6 +49,7 @@ import ( "github.com/prometheus/alertmanager/inhibit" "github.com/prometheus/alertmanager/nflog" "github.com/prometheus/alertmanager/notify" + "github.com/prometheus/alertmanager/provider" "github.com/prometheus/alertmanager/provider/mem" "github.com/prometheus/alertmanager/silence" "github.com/prometheus/alertmanager/template" @@ -108,27 +110,45 @@ func instrumentHandler(handlerName string, handler http.HandlerFunc) http.Handle ) } -func newAlertMetricByState(marker types.Marker, st types.AlertState) prometheus.GaugeFunc { +func newAlertsMetric(state string, function func() float64) prometheus.GaugeFunc { return prometheus.NewGaugeFunc( prometheus.GaugeOpts{ Name: "alertmanager_alerts", Help: "How many alerts by state.", - ConstLabels: prometheus.Labels{"state": string(st)}, - }, - func() float64 { - return float64(marker.Count(st)) + ConstLabels: prometheus.Labels{"state": state}, }, + function, ) } +func newMarkerMetricByState(marker types.Marker, st types.AlertState) prometheus.GaugeFunc { + return newAlertsMetric(string(st), func() float64 { + return float64(marker.Count(st)) + }) +} + func newMarkerMetrics(marker types.Marker) { - alertsActive = newAlertMetricByState(marker, types.AlertStateActive) - alertsSuppressed = newAlertMetricByState(marker, types.AlertStateSuppressed) + alertsActive = newMarkerMetricByState(marker, types.AlertStateActive) + alertsSuppressed = newMarkerMetricByState(marker, types.AlertStateSuppressed) prometheus.MustRegister(alertsActive) prometheus.MustRegister(alertsSuppressed) } +func newAlertsMetricByState(alerts provider.Alerts, status model.AlertStatus) prometheus.GaugeFunc { + return newAlertsMetric(string(status), func() float64 { + return float64(alerts.CountPending(status)) + }) +} + +func newAlertsMetrics(alerts provider.Alerts) { + alertsFiring := newAlertsMetricByState(alerts, model.AlertFiring) + alertsResolved := newAlertsMetricByState(alerts, model.AlertResolved) + + prometheus.MustRegister(alertsFiring) + prometheus.MustRegister(alertsResolved) +} + const defaultClusterAddr = "0.0.0.0:9094" func main() { @@ -283,6 +303,7 @@ func run() int { return 1 } defer alerts.Close() + newAlertsMetrics(alerts) var ( inhibitor *inhibit.Inhibitor diff --git a/inhibit/inhibit_test.go b/inhibit/inhibit_test.go index d5863c4fde..911eadb6fe 100644 --- a/inhibit/inhibit_test.go +++ b/inhibit/inhibit_test.go @@ -212,6 +212,7 @@ func newFakeAlerts(alerts []*types.Alert) *fakeAlerts { } func (f *fakeAlerts) GetPending() provider.AlertIterator { return nil } +func (f *fakeAlerts) CountPending(model.AlertStatus) int { return 0 } func (f *fakeAlerts) Get(model.Fingerprint) (*types.Alert, error) { return nil, nil } func (f *fakeAlerts) Put(...*types.Alert) error { return nil } func (f *fakeAlerts) Subscribe() provider.AlertIterator { diff --git a/provider/mem/mem.go b/provider/mem/mem.go index 0b9097977a..a8ba3fa444 100644 --- a/provider/mem/mem.go +++ b/provider/mem/mem.go @@ -140,6 +140,22 @@ func (a *Alerts) GetPending() provider.AlertIterator { return provider.NewAlertIterator(ch, done, nil) } +func (a *Alerts) CountPending(status model.AlertStatus) int { + count := 0 + alerts := a.GetPending() + defer alerts.Close() + + for a := range alerts.Next() { + if err := alerts.Err(); err != nil { + break + } + if a.Status() == status { + count++ + } + } + return count +} + // Get returns the alert for a given fingerprint. func (a *Alerts) Get(fp model.Fingerprint) (*types.Alert, error) { return a.alerts.Get(fp) diff --git a/provider/mem/mem_test.go b/provider/mem/mem_test.go index 2414c49f9d..9f2ee4ea7d 100644 --- a/provider/mem/mem_test.go +++ b/provider/mem/mem_test.go @@ -290,6 +290,45 @@ func TestAlertsGetPending(t *testing.T) { } } +func TestAlertsCountPending(t *testing.T) { + marker := types.NewMarker() + alerts, err := NewAlerts(context.Background(), marker, 30*time.Minute, log.NewNopLogger()) + if err != nil { + t.Fatal(err) + } + + checkCount := func(status model.AlertStatus, expected int) { + count := alerts.CountPending(status) + if !(count == expected) { + t.Errorf("Unexpected alert count %d instead of %d for status '%s'", count, expected, status) + } + } + + tResolved := time.Now().Add(-time.Second) + tFiring := time.Now().Add(time.Second) + + a1 := types.Alert(*alert1) + a2 := types.Alert(*alert2) + + a1.EndsAt = tFiring + a2.EndsAt = tFiring + + checkCount(model.AlertResolved, 0) + checkCount(model.AlertFiring, 0) + + if err := alerts.Put(&a1, &a2); err != nil { + t.Fatalf("Insert failed: %s", err) + } + + checkCount(model.AlertResolved, 0) + checkCount(model.AlertFiring, 2) + + a1.EndsAt = tResolved + + checkCount(model.AlertResolved, 1) + checkCount(model.AlertFiring, 1) +} + func TestAlertsGC(t *testing.T) { marker := types.NewMarker() alerts, err := NewAlerts(context.Background(), marker, 200*time.Millisecond, log.NewNopLogger()) diff --git a/provider/provider.go b/provider/provider.go index 1ab5fbe93f..5802b6d88b 100644 --- a/provider/provider.go +++ b/provider/provider.go @@ -81,6 +81,8 @@ type Alerts interface { // GetPending returns an iterator over all alerts that have // pending notifications. GetPending() AlertIterator + // CountPending counts pending alerts matching the given status + CountPending(model.AlertStatus) int // Get returns the alert for a given fingerprint. Get(model.Fingerprint) (*types.Alert, error) // Put adds the given alert to the set.