Skip to content

Commit

Permalink
Add Prometheus endpoint
Browse files Browse the repository at this point in the history
  • Loading branch information
archydragon committed Mar 10, 2018
1 parent bc5b910 commit f321454
Show file tree
Hide file tree
Showing 108 changed files with 26,007 additions and 0 deletions.
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ Nixy is a daemon that automatically configures Nginx for web services deployed o
* Automatic service discovery of all running tasks inside Mesos/Marathon, including their health status.
* Basic auth support.
* Health checks for errors in template, nginx config or Marathon endpoints.
* Built-in Prometheus exporter for metrics and alerts.
* ....

## Compatibility
Expand Down Expand Up @@ -229,6 +230,7 @@ Latest versions of Nginx open-source comes with streaming by default. If you are
- `GET /v1/config` JSON response with all variables available inside the template.
- `GET /v1/reload` manually trigger a new config reload.
- `GET /v1/health` JSON response with health status of template, nginx config and Marathon endpoints available.
- `GET /v1/metrics` Prometheus metrics endpoint.
### Nagios Monitoring
Expand Down
16 changes: 16 additions & 0 deletions marathon.go
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ func eventStream() {
}
if endpoint == "" {
logger.Error("all endpoints are down")
go countAllEndpointsDownErrors.Inc()
continue
}
req, err := http.NewRequest("GET", endpoint+"/v2/events", nil)
Expand All @@ -88,6 +89,7 @@ func eventStream() {
"error": err.Error(),
"endpoint": endpoint,
}).Error("unable to create event stream request")
go countMarathonStreamErrors.Inc()
continue
}
req.Header.Set("Accept", "text/event-stream")
Expand All @@ -100,6 +102,7 @@ func eventStream() {
timer := time.AfterFunc(15*time.Second, func() {
cancel()
logger.Warn("No data for 15s, event stream request was cancelled")
go countMarathonStreamNoDataWarnings.Inc()
})
req = req.WithContext(ctx)
resp, err := client.Do(req)
Expand All @@ -108,6 +111,7 @@ func eventStream() {
"error": err.Error(),
"endpoint": endpoint,
}).Error("unable to access Marathon event stream")
go countMarathonStreamErrors.Inc()
// expire request cancellation timer immediately
timer.Reset(100 * time.Millisecond)
continue
Expand All @@ -123,6 +127,7 @@ func eventStream() {
"error": err.Error(),
"endpoint": endpoint,
}).Error("error reading Marathon event stream")
go countMarathonStreamErrors.Inc()
resp.Body.Close()
break
}
Expand All @@ -133,6 +138,7 @@ func eventStream() {
"event": strings.TrimSpace(line[6:]),
"endpoint": endpoint,
}).Info("marathon event received")
go countMarathonEventsReceived.Inc()
select {
case eventqueue <- true: // add reload to our queue channel, unless it is full of course.
default:
Expand Down Expand Up @@ -175,6 +181,7 @@ func endpointHealth() {
"error": err.Error(),
"endpoint": es.Endpoint,
}).Error("endpoint is down")
go countEndpointDownErrors.Inc()
health.Endpoints[i].Healthy = false
health.Endpoints[i].Message = err.Error()
continue
Expand All @@ -185,6 +192,7 @@ func endpointHealth() {
"status": resp.StatusCode,
"endpoint": es.Endpoint,
}).Error("endpoint check failed")
go countEndpointCheckFails.Inc()
health.Endpoints[i].Healthy = false
health.Endpoints[i].Message = resp.Status
continue
Expand Down Expand Up @@ -314,6 +322,7 @@ func syncApps(jsonapps *MarathonApps) bool {
"app": app.ID,
"subdomain": host,
}).Warn("invalid subdomain label")
go countInvalidSubdomainLabelWarnings.Inc()
}
}
// to be compatible with moxy, will probably be removed eventually.
Expand All @@ -328,6 +337,7 @@ func syncApps(jsonapps *MarathonApps) bool {
"app": app.ID,
"subdomain": host,
}).Warn("invalid subdomain label")
go countInvalidSubdomainLabelWarnings.Inc()
}
}
} else {
Expand All @@ -350,6 +360,7 @@ func syncApps(jsonapps *MarathonApps) bool {
"app": app.ID,
"subdomain": host,
}).Warn("duplicate subdomain label")
go countDuplicateSubdomainLabelWarnings.Inc()
// reset hosts if duplicate.
newapp.Hosts = nil
}
Expand Down Expand Up @@ -513,6 +524,7 @@ func reload() {
"error": err.Error(),
}).Error("unable to sync from marathon")
go statsCount("reload.failed", 1)
go countFailedReloads.Inc()
return
}
equal := syncApps(&jsonapps)
Expand All @@ -527,6 +539,7 @@ func reload() {
"error": err.Error(),
}).Error("unable to generate nginx config")
go statsCount("reload.failed", 1)
go countFailedReloads.Inc()
return
}
config.LastUpdates.LastConfigValid = time.Now()
Expand All @@ -536,6 +549,7 @@ func reload() {
"error": err.Error(),
}).Error("unable to reload nginx")
go statsCount("reload.failed", 1)
go countFailedReloads.Inc()
return
}
elapsed := time.Since(start)
Expand All @@ -544,6 +558,8 @@ func reload() {
}).Info("config updated")
go statsCount("reload.success", 1)
go statsTiming("reload.time", elapsed)
go countSuccessfulReloads.Inc()
go observeReloadTimeMetric(elapsed)
config.LastUpdates.LastNginxReload = time.Now()
return
}
3 changes: 3 additions & 0 deletions nixy.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ import (
"github.com/Sirupsen/logrus"
"github.com/gorilla/mux"
"github.com/peterbourgon/g2s"
"github.com/prometheus/client_golang/prometheus/promhttp"
)

// Task struct
Expand Down Expand Up @@ -250,11 +251,13 @@ func main() {
}).Error("unable to Dial statsd")
statsd = g2s.Noop() //fallback to Noop.
}
setupPrometheusMetrics()
mux := mux.NewRouter()
mux.HandleFunc("/", nixyVersion)
mux.HandleFunc("/v1/reload", nixyReload)
mux.HandleFunc("/v1/config", nixyConfig)
mux.HandleFunc("/v1/health", nixyHealth)
mux.Handle("/v1/metrics", promhttp.Handler())
s := &http.Server{
Addr: ":" + config.Port,
Handler: mux,
Expand Down
108 changes: 108 additions & 0 deletions prometheus.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
package main

import (
"time"

"github.com/prometheus/client_golang/prometheus"
)

const ns = "nixy"

var (
countFailedReloads = prometheus.NewCounter(
prometheus.CounterOpts{
Namespace: ns,
Name: "reloads_failed",
Help: "Total number of failed Nginx reloads",
},
)
countSuccessfulReloads = prometheus.NewCounter(
prometheus.CounterOpts{
Namespace: ns,
Name: "reloads_successful",
Help: "Total number of successful Nginx reloads",
},
)
histogramReloadDuration = prometheus.NewHistogram(
prometheus.HistogramOpts{
Namespace: ns,
Name: "reload_duration",
Help: "Nginx reload duration",
Buckets: prometheus.ExponentialBuckets(0.05, 2, 10),
},
)
countInvalidSubdomainLabelWarnings = prometheus.NewCounter(
prometheus.CounterOpts{
Namespace: ns,
Name: "invalid_subdomain_label_warnings",
Help: "Total number of warnings about invalid subdomain label",
},
)
countDuplicateSubdomainLabelWarnings = prometheus.NewCounter(
prometheus.CounterOpts{
Namespace: ns,
Name: "duplicate_subdomain_label_warnings",
Help: "Total number of warnings about duplicate subdomain label",
},
)
countEndpointCheckFails = prometheus.NewCounter(
prometheus.CounterOpts{
Namespace: ns,
Name: "endpoint_check_fails",
Help: "Total number of endpoint check failure errors",
},
)
countEndpointDownErrors = prometheus.NewCounter(
prometheus.CounterOpts{
Namespace: ns,
Name: "endpoint_down_errors",
Help: "Total number of endpoint down errors",
},
)
countAllEndpointsDownErrors = prometheus.NewCounter(
prometheus.CounterOpts{
Namespace: ns,
Name: "all_endpoints_down_errors",
Help: "Total number of all endpoints down errors",
},
)
countMarathonStreamErrors = prometheus.NewCounter(
prometheus.CounterOpts{
Namespace: ns,
Name: "marathon_stream_errors",
Help: "Total number of Marathon stream errors",
},
)
countMarathonStreamNoDataWarnings = prometheus.NewCounter(
prometheus.CounterOpts{
Namespace: ns,
Name: "marathon_stream_no_data_warnings",
Help: "Total number of warnings about no data in Marathon stream",
},
)
countMarathonEventsReceived = prometheus.NewCounter(
prometheus.CounterOpts{
Namespace: ns,
Name: "marathon_events_received",
Help: "Total number of received Marathon events",
},
)
)

func setupPrometheusMetrics() {
prometheus.MustRegister(countFailedReloads)
prometheus.MustRegister(countSuccessfulReloads)
prometheus.MustRegister(histogramReloadDuration)
prometheus.MustRegister(countInvalidSubdomainLabelWarnings)
prometheus.MustRegister(countDuplicateSubdomainLabelWarnings)
prometheus.MustRegister(countEndpointCheckFails)
prometheus.MustRegister(countEndpointDownErrors)
prometheus.MustRegister(countAllEndpointsDownErrors)
prometheus.MustRegister(countMarathonStreamErrors)
prometheus.MustRegister(countMarathonStreamNoDataWarnings)
prometheus.MustRegister(countMarathonEventsReceived)
}

func observeReloadTimeMetric(e time.Duration) {
histogramReloadDuration.Observe(float64(e) / float64(time.Second))
}
20 changes: 20 additions & 0 deletions vendor/github.com/beorn7/perks/LICENSE

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit f321454

Please sign in to comment.