Skip to content

Commit

Permalink
feat: make the health minions metrics configurable
Browse files Browse the repository at this point in the history
  • Loading branch information
Loïc Yavercovski committed Apr 14, 2023
1 parent f21c294 commit df3e0f3
Show file tree
Hide file tree
Showing 4 changed files with 66 additions and 6 deletions.
7 changes: 7 additions & 0 deletions internal/metrics/config.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
package metrics

type MetricsConfig struct {
HealthMinions bool
HealthFunctionsFilters []string
HealthStatesFilters []string
}
26 changes: 21 additions & 5 deletions internal/metrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,17 @@ func boolToFloat64(b bool) float64 {
return 0.0
}

func ExposeMetrics(ctx context.Context, eventChan <-chan events.SaltEvent) {
// Function to check if a string exists in a slice of strings
func contains(slice []string, str string) bool {
for _, s := range slice {
if s == str {
return true
}
}
return false
}

func ExposeMetrics(ctx context.Context, eventChan <-chan events.SaltEvent, metricsConfig MetricsConfig) {
newJobCounter := promauto.NewCounterVec(
prometheus.CounterOpts{
Name: "salt_new_job_total",
Expand Down Expand Up @@ -97,15 +107,21 @@ func ExposeMetrics(ctx context.Context, eventChan <-chan events.SaltEvent) {
event.Data.Id,
success,
).Inc()
lastStateHealth.WithLabelValues(
event.Data.Id,
event.Data.Fun,
state).Set(boolToFloat64(event.Data.Success))
functionResponsesCounter.WithLabelValues(
event.Data.Fun,
state,
success,
).Inc()

if metricsConfig.HealthMinions {
if contains(metricsConfig.HealthFunctionsFilters, event.Data.Fun) &&
contains(metricsConfig.HealthStatesFilters, state) {
lastStateHealth.WithLabelValues(
event.Data.Id,
event.Data.Fun,
state).Set(boolToFloat64(event.Data.Success))
}
}
}
}

Expand Down
19 changes: 18 additions & 1 deletion main.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
"net/http"
"os"
"os/signal"
"strings"
"syscall"

"github.com/kpetremann/salt-exporter/internal/logging"
Expand All @@ -28,6 +29,11 @@ func main() {
tlsEnabled := flag.Bool("tls", false, "enable TLS")
tlsCert := flag.String("tls-cert", "", "TLS certificated")
tlsKey := flag.String("tls-key", "", "TLS private key")
healthMinions := flag.Bool("health-minions", true, "Enable health metric for each minion")
healthFunctionsFilters := flag.String("health-functions-filter", "state.highstate",
"Apply filter on functions to monitor, separated by a comma")
healthStatesFilters := flag.String("health-states-filter", "highstate",
"Apply filter on states to monitor, separated by a comma")
flag.Parse()

logging.ConfigureLogging()
Expand All @@ -47,6 +53,17 @@ func main() {
}
}

var metricsConfig metrics.MetricsConfig
metricsConfig.HealthMinions = *healthMinions
metricsConfig.HealthFunctionsFilters = strings.Split(*healthFunctionsFilters, ",")
metricsConfig.HealthStatesFilters = strings.Split(*healthStatesFilters, ",")

if metricsConfig.HealthMinions {
log.Info().Msg("health-minions: metrics are enabled")
log.Info().Msg("health-minions: functions filters: " + *healthFunctionsFilters)
log.Info().Msg("health-minions: states filters: " + *healthStatesFilters)
}

listenSocket := fmt.Sprint(*listenAddress, ":", *listenPort)

ctx, stop := signal.NotifyContext(context.Background(), os.Interrupt, syscall.SIGTERM)
Expand All @@ -59,7 +76,7 @@ func main() {
eventListener := events.NewEventListener(ctx, eventChan)

go eventListener.ListenEvents()
go metrics.ExposeMetrics(ctx, eventChan)
go metrics.ExposeMetrics(ctx, eventChan, metricsConfig)

// start http server
log.Info().Msg("exposing metrics on " + listenSocket + "/metrics")
Expand Down
20 changes: 20 additions & 0 deletions prometheus_alerts/highstate.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
groups:
- name: saltstack
rules:
- alert: SaltExporterLastHighstateSuccess
expr: sum by(minion) (salt_state_health{function="state.highstate", state="highstate"} == 0)
for: 60m
labels:
severity: critical
minion: "{{ $labels.minion }}"
annotations:
summary: "Salt Last Successful Highstate Failed (minion {{ $labels.minion }})"
description: "Salt Last Successful Highstate failed since > 60m"
- alert: SaltExporterLastHighstateSuccessInfo
expr: sum by(minion) (salt_state_health{function="state.highstate", state="highstate"} == 0)
for: 10m
labels:
severity: info
minion: "{{ $labels.minion }}"
annotations:
summary: "Salt Last Successful Highstate Failed (minion {{ $labels.minion }})"

0 comments on commit df3e0f3

Please sign in to comment.