-
Notifications
You must be signed in to change notification settings - Fork 2.4k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add runit exporter #2
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
package exporter | ||
|
||
import ( | ||
"github.com/prometheus/client_golang/prometheus" | ||
"github.com/soundcloud/go-runit/runit" | ||
) | ||
|
||
var () | ||
|
||
type runitCollector struct { | ||
name string | ||
config config | ||
serviceStatus prometheus.Gauge | ||
} | ||
|
||
func NewRunitCollector(config config, registry prometheus.Registry) (runitCollector, error) { | ||
c := runitCollector{ | ||
name: "runit_collector", | ||
config: config, | ||
serviceStatus: prometheus.NewGauge(), | ||
} | ||
|
||
registry.Register( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Since you are exporting the duration, the metric name should convey that fact! :-) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Changed it to node_service_duration which still is a bit confusing. Any suggestion? The metric answers how long a service was in a specific state. It's still kinda cumbersome if you look at the labels, so happy for suggestions for a different structure in general. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. uptime? Hard to say. I think a good starting point is answering, "what do I intend to convey with this data, and how should it be used?" There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's not only uptime, it's also downtime, time trying to get up etc.. I'll look into adding prometheus alerts based on those metrics and hope I'll come up with better naming on the way. |
||
"node_service_status", | ||
"node_exporter: status of runit service.", | ||
prometheus.NilLabels, | ||
c.serviceStatus, | ||
) | ||
|
||
return c, nil | ||
} | ||
|
||
func (c *runitCollector) Name() string { return c.name } | ||
|
||
func (c *runitCollector) Update() (updates int, err error) { | ||
services, err := runit.GetServices() | ||
if err != nil { | ||
return 0, err | ||
} | ||
|
||
for _, service := range services { | ||
status, err := service.Status() | ||
if err != nil { | ||
return 0, err | ||
} | ||
debug(c.Name(), "%s is %d on pid %d for %d seconds", service.Name, status.State, status.Pid, status.Duration) | ||
labels := map[string]string{ | ||
"name": service.Name, | ||
"state": runit.StateToString[status.State], | ||
"want": runit.StateToString[status.Want], | ||
} | ||
|
||
if status.NormallyUp { | ||
labels["normally_up"] = "yes" | ||
} else { | ||
labels["normally_up"] = "no" | ||
} | ||
|
||
c.serviceStatus.Set(labels, float64(status.Duration)) | ||
updates++ | ||
} | ||
return updates, err | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Cruft.