Skip to content

Commit

Permalink
metrics: add start label for prometheus counters
Browse files Browse the repository at this point in the history
Add start label to each counter with the value of counter creation
timestamp as unix nanoseconds.

This enables OpenTelemetry cumulative temporality,
see https://opentelemetry.io/docs/specs/otel/metrics/data-model/#temporality

Example:
```
~$ curl -s localhost:9911/metrics | grep host_count
 # HELP skipper_serve_host_count Total number of requests of serving a host.
 # TYPE skipper_serve_host_count counter
skipper_serve_host_count{code="200",host="bar_test",method="GET",start="1717066533598500794"} 1
skipper_serve_host_count{code="200",host="foo_test",method="GET",start="1717066538031805059"} 2
```

Fixes #3087

Signed-off-by: Alexander Yastrebov <alexander.yastrebov@zalando.de>
  • Loading branch information
AlexanderYastrebov committed May 30, 2024
1 parent 34b188c commit bdb6068
Show file tree
Hide file tree
Showing 5 changed files with 102 additions and 1 deletion.
3 changes: 3 additions & 0 deletions config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ type Config struct {
BlockProfileRate int `yaml:"block-profile-rate"`
MutexProfileFraction int `yaml:"mutex-profile-fraction"`
MemProfileRate int `yaml:"memory-profile-rate"`
EnableStartTimestamp bool `yaml:"enable-start-timestamp"`
DebugGcMetrics bool `yaml:"debug-gc-metrics"`
RuntimeMetrics bool `yaml:"runtime-metrics"`
ServeRouteMetrics bool `yaml:"serve-route-metrics"`
Expand Down Expand Up @@ -378,6 +379,7 @@ func NewConfig() *Config {
flag.IntVar(&cfg.BlockProfileRate, "block-profile-rate", 0, "block profile sample rate, see runtime.SetBlockProfileRate")
flag.IntVar(&cfg.MutexProfileFraction, "mutex-profile-fraction", 0, "mutex profile fraction rate, see runtime.SetMutexProfileFraction")
flag.IntVar(&cfg.MemProfileRate, "memory-profile-rate", 0, "memory profile rate, see runtime.SetMemProfileRate, keeps default 512 kB")
flag.BoolVar(&cfg.EnableStartTimestamp, "enable-start-timestamp", false, "only for prometheus: adds start label to each counter with the value of counter creation timestamp as unix nanoseconds")
flag.BoolVar(&cfg.DebugGcMetrics, "debug-gc-metrics", false, "enables reporting of the Go garbage collector statistics exported in debug.GCStats")
flag.BoolVar(&cfg.RuntimeMetrics, "runtime-metrics", true, "enables reporting of the Go runtime statistics exported in runtime and specifically runtime.MemStats")
flag.BoolVar(&cfg.ServeRouteMetrics, "serve-route-metrics", false, "enables reporting total serve time metrics for each route")
Expand Down Expand Up @@ -745,6 +747,7 @@ func (c *Config) ToOptions() skipper.Options {
EnableProfile: c.EnableProfile,
BlockProfileRate: c.BlockProfileRate,
MutexProfileFraction: c.MutexProfileFraction,
EnableStartTimestamp: c.EnableStartTimestamp,
EnableDebugGcMetrics: c.DebugGcMetrics,
EnableRuntimeMetrics: c.RuntimeMetrics,
EnableServeRouteMetrics: c.ServeRouteMetrics,
Expand Down
4 changes: 4 additions & 0 deletions metrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,10 @@ type Options struct {
// collected metrics.
Prefix string

// EnableStartTimestamp adds start label to each counter with the value of counter creation
// timestamp as unix nanoseconds.
EnableStartTimestamp bool

// If set, garbage collector metrics are collected
// in addition to the http traffic metrics.
EnableDebugGcMetrics bool
Expand Down
29 changes: 28 additions & 1 deletion metrics/prometheus.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ import (
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/collectors"
"github.com/prometheus/client_golang/prometheus/promhttp"
dto "github.com/prometheus/client_model/go"
"google.golang.org/protobuf/proto"
)

const (
Expand Down Expand Up @@ -295,7 +297,11 @@ func (p *Prometheus) registerMetrics() {
}

func (p *Prometheus) CreateHandler() http.Handler {
return promhttp.HandlerFor(p.registry, promhttp.HandlerOpts{})
var gatherer prometheus.Gatherer = p.registry
if p.opts.EnableStartTimestamp {
gatherer = withStartLabelGatherer{p.registry}
}
return promhttp.HandlerFor(gatherer, promhttp.HandlerOpts{})
}

func (p *Prometheus) getHandler() http.Handler {
Expand Down Expand Up @@ -457,3 +463,24 @@ func (p *Prometheus) IncErrorsStreaming(routeID string) {
}

func (p *Prometheus) Close() {}

// withStartLabelGatherer adds a "start" label to all counters with
// the value of counter creation timestamp as unix nanoseconds.
type withStartLabelGatherer struct {
*prometheus.Registry
}

func (g withStartLabelGatherer) Gather() ([]*dto.MetricFamily, error) {
metricFamilies, err := g.Registry.Gather()
for _, metricFamily := range metricFamilies {
if metricFamily.GetType() == dto.MetricType_COUNTER {
for _, metric := range metricFamily.Metric {
metric.Label = append(metric.Label, &dto.LabelPair{
Name: proto.String("start"),
Value: proto.String(fmt.Sprintf("%d", metric.Counter.CreatedTimestamp.AsTime().UnixNano())),
})
}
}
}
return metricFamilies, err
}
62 changes: 62 additions & 0 deletions metrics/prometheus_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,14 @@ import (
"io"
"net/http"
"net/http/httptest"
"regexp"
"strconv"
"strings"
"testing"
"time"

"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"github.com/zalando/skipper/metrics"
)

Expand Down Expand Up @@ -1053,3 +1057,61 @@ func TestPrometheusMetrics(t *testing.T) {
})
}
}

func TestPrometheusMetricsStartTimestamp(t *testing.T) {
pm := metrics.NewPrometheus(metrics.Options{
EnableStartTimestamp: true,
EnableServeHostCounter: true,
})
path := "/awesome-metrics"

mux := http.NewServeMux()
pm.RegisterHandler(path, mux)

before := time.Now()

pm.MeasureServe("route1", "foo.test", "GET", 200, time.Now().Add(-15*time.Millisecond))
pm.MeasureServe("route1", "bar.test", "POST", 201, time.Now().Add(-15*time.Millisecond))
pm.MeasureServe("route1", "bar.test", "POST", 201, time.Now().Add(-15*time.Millisecond))
pm.IncRoutingFailures()
pm.IncRoutingFailures()
pm.IncRoutingFailures()

after := time.Now()

req := httptest.NewRequest("GET", path, nil)
w := httptest.NewRecorder()
mux.ServeHTTP(w, req)

resp := w.Result()
require.Equal(t, http.StatusOK, resp.StatusCode)

body, err := io.ReadAll(resp.Body)
require.NoError(t, err)

t.Logf("Metrics response:\n%s", body)

// Prometheus client does not allow to mock counter creation timestamps,
// see https://github.com/prometheus/client_golang/issues/1354
//
// checkMetric tests that timestamp is within [before, after] range.
checkMetric := func(pattern string) {
t.Helper()

re := regexp.MustCompile(pattern)

matches := re.FindSubmatch(body)
require.NotNil(t, matches, "Metrics response does not match: %s", pattern)
require.Len(t, matches, 2)

ts, err := strconv.ParseInt(string(matches[1]), 10, 64)
require.NoError(t, err)

assert.GreaterOrEqual(t, ts, before.UnixNano())
assert.LessOrEqual(t, ts, after.UnixNano())
}

checkMetric(`skipper_serve_host_count{code="200",host="foo_test",method="GET",start="(\d+)"} 1`)
checkMetric(`skipper_serve_host_count{code="201",host="bar_test",method="POST",start="(\d+)"} 2`)
checkMetric(`skipper_route_error_total{start="(\d+)"} 3`)
}
5 changes: 5 additions & 0 deletions skipper.go
Original file line number Diff line number Diff line change
Expand Up @@ -458,6 +458,10 @@ type Options struct {
// MemProfileRate calls runtime.SetMemProfileRate(MemProfileRate) if non zero value, deactivate with <0
MemProfileRate int

// EnableStartTimestamp adds start label to each counter with the value of counter creation
// timestamp as unix nanoseconds.
EnableStartTimestamp bool

// Flag that enables reporting of the Go garbage collector statistics exported in debug.GCStats
EnableDebugGcMetrics bool

Expand Down Expand Up @@ -1491,6 +1495,7 @@ func run(o Options, sig chan os.Signal, idleConnsCH chan struct{}) error {
mtrOpts := metrics.Options{
Format: metricsKind,
Prefix: o.MetricsPrefix,
EnableStartTimestamp: o.EnableStartTimestamp,
EnableDebugGcMetrics: o.EnableDebugGcMetrics,
EnableRuntimeMetrics: o.EnableRuntimeMetrics,
EnableServeRouteMetrics: o.EnableServeRouteMetrics,
Expand Down

0 comments on commit bdb6068

Please sign in to comment.