Skip to content

Commit

Permalink
Add "firing" and "resolved" status to "alertmanager_alerts" metric
Browse files Browse the repository at this point in the history
see #1439

Signed-off-by: Patrick Harböck <patrick.harboeck@tngtech.com>
  • Loading branch information
Pharb committed Dec 16, 2018
1 parent 9a11673 commit 65c97b3
Show file tree
Hide file tree
Showing 6 changed files with 88 additions and 10 deletions.
5 changes: 2 additions & 3 deletions api/v1/api_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,9 +58,8 @@ func newFakeAlerts(alerts []*types.Alert, withErr bool) *fakeAlerts {

func (f *fakeAlerts) Subscribe() provider.AlertIterator { return nil }
func (f *fakeAlerts) Get(model.Fingerprint) (*types.Alert, error) { return nil, nil }
func (f *fakeAlerts) Put(alerts ...*types.Alert) error {
return f.err
}
func (f *fakeAlerts) Put(alerts ...*types.Alert) error { return f.err }
func (f *fakeAlerts) CountPending(model.AlertStatus) int { return -1 }
func (f *fakeAlerts) GetPending() provider.AlertIterator {
ch := make(chan *types.Alert)
done := make(chan struct{})
Expand Down
35 changes: 28 additions & 7 deletions cmd/alertmanager/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ import (
"github.com/go-kit/kit/log/level"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promhttp"
"github.com/prometheus/common/model"
"github.com/prometheus/common/promlog"
promlogflag "github.com/prometheus/common/promlog/flag"
"github.com/prometheus/common/route"
Expand All @@ -48,6 +49,7 @@ import (
"github.com/prometheus/alertmanager/inhibit"
"github.com/prometheus/alertmanager/nflog"
"github.com/prometheus/alertmanager/notify"
"github.com/prometheus/alertmanager/provider"
"github.com/prometheus/alertmanager/provider/mem"
"github.com/prometheus/alertmanager/silence"
"github.com/prometheus/alertmanager/template"
Expand Down Expand Up @@ -108,27 +110,45 @@ func instrumentHandler(handlerName string, handler http.HandlerFunc) http.Handle
)
}

func newAlertMetricByState(marker types.Marker, st types.AlertState) prometheus.GaugeFunc {
func newAlertsMetric(state string, function func() float64) prometheus.GaugeFunc {
return prometheus.NewGaugeFunc(
prometheus.GaugeOpts{
Name: "alertmanager_alerts",
Help: "How many alerts by state.",
ConstLabels: prometheus.Labels{"state": string(st)},
},
func() float64 {
return float64(marker.Count(st))
ConstLabels: prometheus.Labels{"state": state},
},
function,
)
}

func newMarkerMetricByState(marker types.Marker, st types.AlertState) prometheus.GaugeFunc {
return newAlertsMetric(string(st), func() float64 {
return float64(marker.Count(st))
})
}

func newMarkerMetrics(marker types.Marker) {
alertsActive = newAlertMetricByState(marker, types.AlertStateActive)
alertsSuppressed = newAlertMetricByState(marker, types.AlertStateSuppressed)
alertsActive = newMarkerMetricByState(marker, types.AlertStateActive)
alertsSuppressed = newMarkerMetricByState(marker, types.AlertStateSuppressed)

prometheus.MustRegister(alertsActive)
prometheus.MustRegister(alertsSuppressed)
}

func newAlertsMetricByState(alerts provider.Alerts, status model.AlertStatus) prometheus.GaugeFunc {
return newAlertsMetric(string(status), func() float64 {
return float64(alerts.CountPending(status))
})
}

func newAlertsMetrics(alerts provider.Alerts) {
alertsFiring := newAlertsMetricByState(alerts, model.AlertFiring)
alertsResolved := newAlertsMetricByState(alerts, model.AlertResolved)

prometheus.MustRegister(alertsFiring)
prometheus.MustRegister(alertsResolved)
}

const defaultClusterAddr = "0.0.0.0:9094"

func main() {
Expand Down Expand Up @@ -283,6 +303,7 @@ func run() int {
return 1
}
defer alerts.Close()
newAlertsMetrics(alerts)

var (
inhibitor *inhibit.Inhibitor
Expand Down
1 change: 1 addition & 0 deletions inhibit/inhibit_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,7 @@ func newFakeAlerts(alerts []*types.Alert) *fakeAlerts {
}

func (f *fakeAlerts) GetPending() provider.AlertIterator { return nil }
func (f *fakeAlerts) CountPending(model.AlertStatus) int { return 0 }
func (f *fakeAlerts) Get(model.Fingerprint) (*types.Alert, error) { return nil, nil }
func (f *fakeAlerts) Put(...*types.Alert) error { return nil }
func (f *fakeAlerts) Subscribe() provider.AlertIterator {
Expand Down
16 changes: 16 additions & 0 deletions provider/mem/mem.go
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,22 @@ func (a *Alerts) GetPending() provider.AlertIterator {
return provider.NewAlertIterator(ch, done, nil)
}

func (a *Alerts) CountPending(status model.AlertStatus) int {
count := 0
alerts := a.GetPending()
defer alerts.Close()

for a := range alerts.Next() {
if err := alerts.Err(); err != nil {
break
}
if a.Status() == status {
count++
}
}
return count
}

// Get returns the alert for a given fingerprint.
func (a *Alerts) Get(fp model.Fingerprint) (*types.Alert, error) {
return a.alerts.Get(fp)
Expand Down
39 changes: 39 additions & 0 deletions provider/mem/mem_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -290,6 +290,45 @@ func TestAlertsGetPending(t *testing.T) {
}
}

func TestAlertsCountPending(t *testing.T) {
marker := types.NewMarker()
alerts, err := NewAlerts(context.Background(), marker, 30*time.Minute, log.NewNopLogger())
if err != nil {
t.Fatal(err)
}

checkCount := func(status model.AlertStatus, expected int) {
count := alerts.CountPending(status)
if !(count == expected) {
t.Errorf("Unexpected alert count %d instead of %d for status '%s'", count, expected, status)
}
}

tResolved := time.Now().Add(-time.Second)
tFiring := time.Now().Add(time.Second)

a1 := types.Alert(*alert1)
a2 := types.Alert(*alert2)

a1.EndsAt = tFiring
a2.EndsAt = tFiring

checkCount(model.AlertResolved, 0)
checkCount(model.AlertFiring, 0)

if err := alerts.Put(&a1, &a2); err != nil {
t.Fatalf("Insert failed: %s", err)
}

checkCount(model.AlertResolved, 0)
checkCount(model.AlertFiring, 2)

a1.EndsAt = tResolved

checkCount(model.AlertResolved, 1)
checkCount(model.AlertFiring, 1)
}

func TestAlertsGC(t *testing.T) {
marker := types.NewMarker()
alerts, err := NewAlerts(context.Background(), marker, 200*time.Millisecond, log.NewNopLogger())
Expand Down
2 changes: 2 additions & 0 deletions provider/provider.go
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,8 @@ type Alerts interface {
// GetPending returns an iterator over all alerts that have
// pending notifications.
GetPending() AlertIterator
// CountPending counts pending alerts matching the given status
CountPending(model.AlertStatus) int
// Get returns the alert for a given fingerprint.
Get(model.Fingerprint) (*types.Alert, error)
// Put adds the given alert to the set.
Expand Down

0 comments on commit 65c97b3

Please sign in to comment.