Skip to content

Commit

Permalink
expose the key label
Browse files Browse the repository at this point in the history
  • Loading branch information
qinxx108 committed Jul 13, 2023
1 parent 35dda1c commit 83dd411
Show file tree
Hide file tree
Showing 2 changed files with 64 additions and 8 deletions.
16 changes: 8 additions & 8 deletions pkg/alertmanager/alertmanager_metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -168,19 +168,19 @@ func newAlertmanagerMetrics() *alertmanagerMetrics {
partialMerges: prometheus.NewDesc(
"cortex_alertmanager_partial_state_merges_total",
"Number of times we have received a partial state to merge for a key.",
[]string{"user"}, nil),
[]string{"user", "key"}, nil),
partialMergesFailed: prometheus.NewDesc(
"cortex_alertmanager_partial_state_merges_failed_total",
"Number of times we have failed to merge a partial state received for a key.",
[]string{"user"}, nil),
[]string{"user", "key"}, nil),
replicationTotal: prometheus.NewDesc(
"cortex_alertmanager_state_replication_total",
"Number of times we have tried to replicate a state to other alertmanagers",
[]string{"user"}, nil),
[]string{"user", "key"}, nil),
replicationFailed: prometheus.NewDesc(
"cortex_alertmanager_state_replication_failed_total",
"Number of times we have failed to replicate a state to other alertmanagers",
[]string{"user"}, nil),
[]string{"user", "key"}, nil),
fetchReplicaStateTotal: prometheus.NewDesc(
"cortex_alertmanager_state_fetch_replica_state_total",
"Number of times we have tried to read and merge the full state from another replica.",
Expand Down Expand Up @@ -317,10 +317,10 @@ func (m *alertmanagerMetrics) Collect(out chan<- prometheus.Metric) {

data.SendMaxOfGaugesPerUser(out, m.configHashValue, "alertmanager_config_hash")

data.SendSumOfCountersPerUser(out, m.partialMerges, "alertmanager_partial_state_merges_total")
data.SendSumOfCountersPerUser(out, m.partialMergesFailed, "alertmanager_partial_state_merges_failed_total")
data.SendSumOfCountersPerUser(out, m.replicationTotal, "alertmanager_state_replication_total")
data.SendSumOfCountersPerUser(out, m.replicationFailed, "alertmanager_state_replication_failed_total")
data.SendSumOfCountersPerUserWithLabels(out, m.partialMerges, "alertmanager_partial_state_merges_total", "key")
data.SendSumOfCountersPerUserWithLabels(out, m.partialMergesFailed, "alertmanager_partial_state_merges_failed_total", "key")
data.SendSumOfCountersPerUserWithLabels(out, m.replicationTotal, "alertmanager_state_replication_total", "key")
data.SendSumOfCountersPerUserWithLabels(out, m.replicationFailed, "alertmanager_state_replication_failed_total", "key")
data.SendSumOfCounters(out, m.fetchReplicaStateTotal, "alertmanager_state_fetch_replica_state_total")
data.SendSumOfCounters(out, m.fetchReplicaStateFailed, "alertmanager_state_fetch_replica_state_failed_total")
data.SendSumOfCounters(out, m.initialSyncTotal, "alertmanager_state_initial_sync_total")
Expand Down
56 changes: 56 additions & 0 deletions pkg/alertmanager/alertmanager_metrics_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,17 @@ func TestAlertmanagerMetricsStore(t *testing.T) {
cortex_alertmanager_notifications_total{integration="wechat",user="user2"} 20
cortex_alertmanager_notifications_total{integration="wechat",user="user3"} 200
# HELP cortex_alertmanager_partial_state_merges_failed_total Number of times we have failed to merge a partial state received for a key.
# TYPE cortex_alertmanager_partial_state_merges_failed_total counter
cortex_alertmanager_partial_state_merges_failed_total{key="nil",user="user1"} 2
cortex_alertmanager_partial_state_merges_failed_total{key="nil",user="user2"} 20
cortex_alertmanager_partial_state_merges_failed_total{key="nil",user="user3"} 200
# HELP cortex_alertmanager_partial_state_merges_total Number of times we have received a partial state to merge for a key.
# TYPE cortex_alertmanager_partial_state_merges_total counter
cortex_alertmanager_partial_state_merges_total{key="nil",user="user1"} 3
cortex_alertmanager_partial_state_merges_total{key="nil",user="user2"} 30
cortex_alertmanager_partial_state_merges_total{key="nil",user="user3"} 300
# HELP cortex_alertmanager_silences How many silences by state.
# TYPE cortex_alertmanager_silences gauge
cortex_alertmanager_silences{state="active",user="user1"} 1
Expand Down Expand Up @@ -506,6 +517,17 @@ func TestAlertmanagerMetricsRemoval(t *testing.T) {
cortex_alertmanager_notifications_total{integration="wechat",user="user2"} 20
cortex_alertmanager_notifications_total{integration="wechat",user="user3"} 200
# HELP cortex_alertmanager_partial_state_merges_failed_total Number of times we have failed to merge a partial state received for a key.
# TYPE cortex_alertmanager_partial_state_merges_failed_total counter
cortex_alertmanager_partial_state_merges_failed_total{key="nil",user="user1"} 2
cortex_alertmanager_partial_state_merges_failed_total{key="nil",user="user2"} 20
cortex_alertmanager_partial_state_merges_failed_total{key="nil",user="user3"} 200
# HELP cortex_alertmanager_partial_state_merges_total Number of times we have received a partial state to merge for a key.
# TYPE cortex_alertmanager_partial_state_merges_total counter
cortex_alertmanager_partial_state_merges_total{key="nil",user="user1"} 3
cortex_alertmanager_partial_state_merges_total{key="nil",user="user2"} 30
cortex_alertmanager_partial_state_merges_total{key="nil",user="user3"} 300
# HELP cortex_alertmanager_silences How many silences by state.
# TYPE cortex_alertmanager_silences gauge
cortex_alertmanager_silences{state="active",user="user1"} 1
Expand Down Expand Up @@ -758,6 +780,15 @@ func TestAlertmanagerMetricsRemoval(t *testing.T) {
cortex_alertmanager_notifications_total{integration="wechat",user="user1"} 2
cortex_alertmanager_notifications_total{integration="wechat",user="user2"} 20
# HELP cortex_alertmanager_partial_state_merges_failed_total Number of times we have failed to merge a partial state received for a key.
# TYPE cortex_alertmanager_partial_state_merges_failed_total counter
cortex_alertmanager_partial_state_merges_failed_total{key="nil",user="user1"} 2
cortex_alertmanager_partial_state_merges_failed_total{key="nil",user="user2"} 20
# HELP cortex_alertmanager_partial_state_merges_total Number of times we have received a partial state to merge for a key.
# TYPE cortex_alertmanager_partial_state_merges_total counter
cortex_alertmanager_partial_state_merges_total{key="nil",user="user1"} 3
cortex_alertmanager_partial_state_merges_total{key="nil",user="user2"} 30
# HELP cortex_alertmanager_silences How many silences by state.
# TYPE cortex_alertmanager_silences gauge
cortex_alertmanager_silences{state="active",user="user1"} 1
Expand Down Expand Up @@ -898,6 +929,10 @@ func populateAlertmanager(base float64) *prometheus.Registry {
lm.size.Set(100 * base)
lm.insertFailures.Add(7 * base)

sr := newStateReplicationMetrics(reg)
sr.partialStateMergesFailed.WithLabelValues("nil").Add(base * 2)
sr.partialStateMergesTotal.WithLabelValues("nil").Add(base * 3)

return reg
}

Expand Down Expand Up @@ -1130,3 +1165,24 @@ func newLimiterMetrics(r prometheus.Registerer) *limiterMetrics {
insertFailures: insertAlertFailures,
}
}

type stateReplicationMetrics struct {
partialStateMergesTotal *prometheus.CounterVec
partialStateMergesFailed *prometheus.CounterVec
}

func newStateReplicationMetrics(r prometheus.Registerer) *stateReplicationMetrics {
partialStateMergesTotal := promauto.With(r).NewCounterVec(prometheus.CounterOpts{
Name: "alertmanager_partial_state_merges_total",
Help: "Number of times we have received a partial state to merge for a key.",
}, []string{"key"})
partialStateMergesFailed := promauto.With(r).NewCounterVec(prometheus.CounterOpts{
Name: "alertmanager_partial_state_merges_failed_total",
Help: "Number of times we have failed to merge a partial state received for a key.",
}, []string{"key"})

return &stateReplicationMetrics{
partialStateMergesTotal: partialStateMergesTotal,
partialStateMergesFailed: partialStateMergesFailed,
}
}

0 comments on commit 83dd411

Please sign in to comment.