Skip to content

Commit

Permalink
Merge pull request #203 from raypinto/emit-healthz-metrics
Browse files Browse the repository at this point in the history
emit healthz metrics
  • Loading branch information
wallyqs authored Feb 2, 2023
2 parents 70ecd92 + c319f95 commit f19a6c9
Show file tree
Hide file tree
Showing 7 changed files with 107 additions and 4 deletions.
4 changes: 3 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ The Prometheus NATS Exporter consists of both a package and an application that
exports [NATS server](http://nats.io/documentation/server/gnatsd-intro) metrics
to [Prometheus](https://prometheus.io/) for monitoring. The exporter aggregates
metrics from the server monitoring endpoints you choose (varz, connz, subz,
routez) from a NATS server into a single Prometheus exporter endpoint.
routez, healthz) from a NATS server into a single Prometheus exporter endpoint.

# Build
``` bash
Expand Down Expand Up @@ -55,6 +55,8 @@ prometheus-nats-exporter <flags> url
Get streaming channel metrics.
-connz
Get connection metrics.
-healthz
Get health metrics.
-gatewayz
Get gateway metrics.
-leafz
Expand Down
5 changes: 4 additions & 1 deletion collector/collector.go
Original file line number Diff line number Diff line change
Expand Up @@ -405,11 +405,14 @@ func boolToFloat(b bool) float64 {
}

// NewCollector creates a new NATS Collector from a list of monitoring URLs.
// Each URL should be to a specific endpoint (e.g. varz, connz, subsz, or routez)
// Each URL should be to a specific endpoint (e.g. varz, connz, healthz, subsz, or routez)
func NewCollector(system, endpoint, prefix string, servers []*CollectedServer) prometheus.Collector {
if isStreamingEndpoint(system, endpoint) {
return newStreamingCollector(getSystem(system, prefix), endpoint, servers)
}
if isHealthzEndpoint(system, endpoint) {
return newHealthzCollector(getSystem(system, prefix), endpoint, servers)
}
if isConnzEndpoint(system, endpoint) {
return newConnzCollector(getSystem(system, prefix), endpoint, servers)
}
Expand Down
5 changes: 5 additions & 0 deletions collector/collector_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -321,6 +321,11 @@ func TestAllEndpoints(t *testing.T) {
"gnatsd_connz_total_connections": 1,
}
verifyCollector(CoreSystem, url, "connz", cases, t)

cases = map[string]float64{
"gnatsd_healthz_status": 0,
}
verifyCollector(CoreSystem, url, "healthz", cases, t)
}

const (
Expand Down
85 changes: 85 additions & 0 deletions collector/healthz.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
// Copyright 2023 The NATS Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// Package collector has various collector utilities and implementations.
package collector

import (
"net/http"
"sync"

"github.com/prometheus/client_golang/prometheus"
)

func isHealthzEndpoint(system, endpoint string) bool {
return system == CoreSystem && endpoint == "healthz"
}

type healthzCollector struct {
sync.Mutex

httpClient *http.Client
servers []*CollectedServer

status *prometheus.Desc
}

func newHealthzCollector(system, endpoint string, servers []*CollectedServer) prometheus.Collector {
nc := &healthzCollector{
httpClient: http.DefaultClient,
status: prometheus.NewDesc(
prometheus.BuildFQName(system, endpoint, "status"),
"status",
[]string{"server_id"},
nil,
),
}

nc.servers = make([]*CollectedServer, len(servers))
for i, s := range servers {
nc.servers[i] = &CollectedServer{
ID: s.ID,
URL: s.URL + endpoint,
}
}

return nc
}

func (nc *healthzCollector) Describe(ch chan<- *prometheus.Desc) {
ch <- nc.status
}

// Collect gathers the server healthz metrics.
func (nc *healthzCollector) Collect(ch chan<- prometheus.Metric) {
for _, server := range nc.servers {
var health Healthz
if err := getMetricURL(nc.httpClient, server.URL, &health); err != nil {
Debugf("ignoring server %s: %v", server.ID, err)
continue
}

var status float64 = 1
if health.Status == "ok" {
status = 0
}

ch <- prometheus.MustNewConstMetric(nc.status, prometheus.GaugeValue, status, server.ID)
}
}

// Healthz output
type Healthz struct {
Status string `json:"status"`
Error string `json:"error,omitempty"`
}
6 changes: 5 additions & 1 deletion exporter/exporter.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ type NATSExporterOptions struct {
ListenAddress string
ListenPort int
ScrapePath string
GetHealthz bool
GetConnz bool
GetVarz bool
GetSubz bool
Expand Down Expand Up @@ -173,7 +174,7 @@ func (ne *NATSExporter) InitializeCollectors() error {
}

getJsz := opts.GetJszFilter != ""
if !opts.GetConnz && !opts.GetRoutez && !opts.GetSubz && !opts.GetVarz &&
if !opts.GetHealthz && !opts.GetConnz && !opts.GetRoutez && !opts.GetSubz && !opts.GetVarz &&
!opts.GetGatewayz && !opts.GetLeafz && !opts.GetStreamingChannelz &&
!opts.GetStreamingServerz && !opts.GetReplicatorVarz && !getJsz {
return fmt.Errorf("no Collectors specfied")
Expand All @@ -187,6 +188,9 @@ func (ne *NATSExporter) InitializeCollectors() error {
if opts.GetVarz {
ne.createCollector(collector.CoreSystem, "varz")
}
if opts.GetHealthz {
ne.createCollector(collector.CoreSystem, "healthz")
}
if opts.GetConnz {
ne.createCollector(collector.CoreSystem, "connz")
}
Expand Down
3 changes: 3 additions & 0 deletions exporter/exporter_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,7 @@ func TestExporter(t *testing.T) {
opts.ListenPort = 0
opts.GetVarz = true
opts.GetConnz = true
opts.GetHealthz = true
opts.GetSubz = true
opts.GetGatewayz = true
opts.GetLeafz = true
Expand Down Expand Up @@ -265,6 +266,7 @@ func TestExporterScrapePathOption(t *testing.T) {
opts.ScrapePath = "/some/other/path/to/metrics"
opts.GetVarz = true
opts.GetConnz = true
opts.GetHealthz = true
opts.GetSubz = true
opts.GetRoutez = true

Expand Down Expand Up @@ -556,6 +558,7 @@ func TestExporterBasicAuth(t *testing.T) {
opts.ListenPort = 0
opts.GetVarz = true
opts.GetConnz = true
opts.GetHealthz = true
opts.GetSubz = true
opts.GetRoutez = true

Expand Down
3 changes: 2 additions & 1 deletion main.go
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ func updateOptions(debugAndTrace, useSysLog bool, opts *exporter.NATSExporterOpt
opts.LogType = collector.RemoteSysLogType
}

metricsSpecified := opts.GetConnz || opts.GetVarz || opts.GetSubz ||
metricsSpecified := opts.GetConnz || opts.GetVarz || opts.GetSubz || opts.GetHealthz ||
opts.GetRoutez || opts.GetGatewayz || opts.GetLeafz || opts.GetStreamingChannelz ||
opts.GetStreamingServerz || opts.GetReplicatorVarz || opts.GetJszFilter == ""
if !metricsSpecified {
Expand Down Expand Up @@ -111,6 +111,7 @@ func main() {
flag.BoolVar(&opts.Trace, "V", false, "Enable trace log level.")
flag.BoolVar(&debugAndTrace, "DV", false, "Enable debug and trace log levels.")
flag.BoolVar(&opts.GetConnz, "connz", false, "Get connection metrics.")
flag.BoolVar(&opts.GetHealthz, "healthz", false, "Get health metrics.")
flag.BoolVar(&opts.GetReplicatorVarz, "replicatorVarz", false, "Get replicator general metrics.")
flag.BoolVar(&opts.GetGatewayz, "gatewayz", false, "Get gateway metrics.")
flag.BoolVar(&opts.GetLeafz, "leafz", false, "Get leaf metrics.")
Expand Down

0 comments on commit f19a6c9

Please sign in to comment.