From 8af84a215dd21a7d78be0a82cf88b3b5ce52250c Mon Sep 17 00:00:00 2001 From: xginn8 Date: Thu, 5 Jul 2018 07:31:45 -0400 Subject: [PATCH] Add support for NRestarts counter introduced in systemd 235 (#992) * Add support for NRestarts counter introduced in systemd 235 `.service` units increment this counter any time the Restart= condition is triggered. Signed-off-by: Matthew McGinn --- CHANGELOG.md | 2 +- collector/systemd_linux.go | 19 +++++++++++++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b9f4b153cc..35e8899b2c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,7 @@ **Breaking changes** * [CHANGE] -* [FEATURE] +* [FEATURE] Collect NRestarts property for systemd service units * [ENHANCEMENT] * [BUGFIX] diff --git a/collector/systemd_linux.go b/collector/systemd_linux.go index f90b49a502..e05c289aad 100644 --- a/collector/systemd_linux.go +++ b/collector/systemd_linux.go @@ -36,6 +36,7 @@ type systemdCollector struct { unitDesc *prometheus.Desc systemRunningDesc *prometheus.Desc summaryDesc *prometheus.Desc + nRestartsDesc *prometheus.Desc timerLastTriggerDesc *prometheus.Desc unitWhitelistPattern *regexp.Regexp unitBlacklistPattern *regexp.Regexp @@ -63,6 +64,9 @@ func NewSystemdCollector() (Collector, error) { summaryDesc := prometheus.NewDesc( prometheus.BuildFQName(namespace, subsystem, "units"), "Summary of systemd unit states", []string{"state"}, nil) + nRestartsDesc := prometheus.NewDesc( + prometheus.BuildFQName(namespace, subsystem, "service_restart_total"), + "Service unit count of Restart triggers", []string{"state"}, nil) timerLastTriggerDesc := prometheus.NewDesc( prometheus.BuildFQName(namespace, subsystem, "timer_last_trigger_seconds"), "Seconds since epoch of last trigger.", []string{"name"}, nil) @@ -73,6 +77,7 @@ func NewSystemdCollector() (Collector, error) { unitDesc: unitDesc, systemRunningDesc: systemRunningDesc, summaryDesc: summaryDesc, + nRestartsDesc: nRestartsDesc, timerLastTriggerDesc: timerLastTriggerDesc, unitWhitelistPattern: unitWhitelistPattern, unitBlacklistPattern: unitBlacklistPattern, @@ -112,6 +117,11 @@ func (c *systemdCollector) collectUnitStatusMetrics(ch chan<- prometheus.Metric, c.unitDesc, prometheus.GaugeValue, isActive, unit.Name, stateName) } + if strings.HasSuffix(unit.Name, ".service") { + ch <- prometheus.MustNewConstMetric( + c.nRestartsDesc, prometheus.CounterValue, + float64(unit.nRestarts), unit.Name) + } } } @@ -153,6 +163,7 @@ func (c *systemdCollector) newDbus() (*dbus.Conn, error) { type unit struct { dbus.UnitStatus lastTriggerUsec uint64 + nRestarts uint32 } func (c *systemdCollector) getAllUnits() ([]unit, error) { @@ -181,6 +192,14 @@ func (c *systemdCollector) getAllUnits() ([]unit, error) { unit.lastTriggerUsec = lastTriggerValue.Value.Value().(uint64) } + if strings.HasSuffix(unit.Name, ".service") { + nRestarts, err := conn.GetUnitTypeProperty(unit.Name, "Service", "NRestarts") + if err != nil { + log.Debugf("couldn't get unit '%s' NRestarts: %s\n", unit.Name, err) + continue + } + unit.nRestarts = nRestarts.Value.Value().(uint32) + } result = append(result, unit) }