Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add HTTP metrics for in-flight requests #5440

Merged
merged 9 commits into from
Jul 1, 2022
18 changes: 10 additions & 8 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,26 +14,28 @@ We use *breaking :warning:* to mark changes that are not backward compatible (re

### Added

- [#5440](https://github.com/thanos-io/thanos/pull/5440) HTTP metrics: export number of in-flight HTTP requests.

### Changed

### Removed

## [v0.27.0-rc0](https://github.com/thanos-io/thanos/tree/release-0.27) - Release in progress

### Fixed
- [#5339](https://github.com/thanos-io/thanos/pull/5339) Receive: Fix deadlock on interrupt in routerOnly mode
- [#5357](https://github.com/thanos-io/thanos/pull/5357) Store: fix groupcache handling of slashes
- [#5427](https://github.com/thanos-io/thanos/pull/5427) Receive: Fix Ketama hashring replication consistency
- [#5339](https://github.com/thanos-io/thanos/pull/5339) Receive: Fix deadlock on interrupt in routerOnly mode.
- [#5357](https://github.com/thanos-io/thanos/pull/5357) Store: fix groupcache handling of slashes.
- [#5427](https://github.com/thanos-io/thanos/pull/5427) Receive: Fix Ketama hashring replication consistency.

### Added

- [#5337](https://github.com/thanos-io/thanos/pull/5337) Thanos Object Store: Add the `prefix` option to buckets
- [#5337](https://github.com/thanos-io/thanos/pull/5337) Thanos Object Store: Add the `prefix` option to buckets.
- [#5409](https://github.com/thanos-io/thanos/pull/5409) S3: Add option to force DNS style lookup.
- [#5352](https://github.com/thanos-io/thanos/pull/5352) Cache: Add cache metrics to groupcache.
- [#5391](https://github.com/thanos-io/thanos/pull/5391) Receive: Add relabeling support.
- [#5408](https://github.com/thanos-io/thanos/pull/5391) Receive: Add support for consistent hashrings.
- [#5408](https://github.com/thanos-io/thanos/pull/5408) Receive: Add support for consistent hashrings.
- [#5391](https://github.com/thanos-io/thanos/pull/5391) Receive: Implement api/v1/status/tsdb.
- [#5424](https://github.com/thanos-io/thanos/pull/5424) Receive: export metrics regarding size of remote write requests
- [#5424](https://github.com/thanos-io/thanos/pull/5424) Receive: export metrics regarding size of remote write requests.
- [#5420](https://github.com/thanos-io/thanos/pull/5420) Receive: Automatically remove stale tenants.

### Changed
Expand All @@ -50,13 +52,13 @@ We use *breaking :warning:* to mark changes that are not backward compatible (re
### Fixed
- [#5281](https://github.com/thanos-io/thanos/pull/5281) Blocks: Use correct separators for filesystem paths and object storage paths respectively.
- [#5300](https://github.com/thanos-io/thanos/pull/5300) Query: Ignore cache on queries with deduplication off.
- [#5324](https://github.com/thanos-io/thanos/pull/5324) Reloader: Force trigger reload when config rollbacked
- [#5324](https://github.com/thanos-io/thanos/pull/5324) Reloader: Force trigger reload when config rollbacked.

### Added

- [#5220](https://github.com/thanos-io/thanos/pull/5220) Query Frontend: Add `--query-frontend.forward-header` flag, forward headers to downstream querier.
- [#5250](https://github.com/thanos-io/thanos/pull/5250/files) Querier: Expose Query and QueryRange APIs through GRPC.
- [#5290](https://github.com/thanos-io/thanos/pull/5290) Add support for [ppc64le](https://en.wikipedia.org/wiki/Ppc64)
- [#5290](https://github.com/thanos-io/thanos/pull/5290) Add support for [ppc64le](https://en.wikipedia.org/wiki/Ppc64).

### Changed

Expand Down
76 changes: 42 additions & 34 deletions pkg/extprom/http/instrument_server.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,34 +61,37 @@ func (ins *defaultInstrumentationMiddleware) NewHandler(handlerName string, hand
func httpInstrumentationHandler(baseLabels prometheus.Labels, metrics *defaultMetrics, next http.Handler) http.HandlerFunc {
return promhttp.InstrumentHandlerRequestSize(
metrics.requestSize.MustCurryWith(baseLabels),
promhttp.InstrumentHandlerCounter(
metrics.requestsTotal.MustCurryWith(baseLabels),
promhttp.InstrumentHandlerResponseSize(
metrics.responseSize.MustCurryWith(baseLabels),
http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
now := time.Now()

wd := &responseWriterDelegator{w: w}
next.ServeHTTP(wd, r)

requestLabels := prometheus.Labels{"code": wd.Status(), "method": strings.ToLower(r.Method)}
observer := metrics.requestDuration.MustCurryWith(baseLabels).With(requestLabels)
observer.Observe(time.Since(now).Seconds())

// If we find a tracingID we'll expose it as Exemplar.
span := opentracing.SpanFromContext(r.Context())
if span != nil {
spanCtx, ok := span.Context().(jaeger.SpanContext)
if ok && spanCtx.IsSampled() {
observer.(prometheus.ExemplarObserver).ObserveWithExemplar(
time.Since(now).Seconds(),
prometheus.Labels{
"traceID": spanCtx.TraceID().String(),
},
)
instrumentHandlerInFlight(
metrics.inflightHTTPRequests.MustCurryWith(baseLabels),
promhttp.InstrumentHandlerCounter(
metrics.requestsTotal.MustCurryWith(baseLabels),
promhttp.InstrumentHandlerResponseSize(
metrics.responseSize.MustCurryWith(baseLabels),
http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
now := time.Now()

wd := &responseWriterDelegator{w: w}
next.ServeHTTP(wd, r)

requestLabels := prometheus.Labels{"code": wd.Status(), "method": strings.ToLower(r.Method)}
observer := metrics.requestDuration.MustCurryWith(baseLabels).With(requestLabels)
observer.Observe(time.Since(now).Seconds())

// If we find a tracingID we'll expose it as Exemplar.
span := opentracing.SpanFromContext(r.Context())
if span != nil {
spanCtx, ok := span.Context().(jaeger.SpanContext)
if ok && spanCtx.IsSampled() {
observer.(prometheus.ExemplarObserver).ObserveWithExemplar(
time.Since(now).Seconds(),
prometheus.Labels{
"traceID": spanCtx.TraceID().String(),
},
)
}
}
}
}),
}),
),
),
),
)
Expand Down Expand Up @@ -126,11 +129,16 @@ func (wd *responseWriterDelegator) Status() string {
return fmt.Sprintf("%d", wd.StatusCode())
}

// NewInstrumentHandlerInflightTenant creates a middleware used to export the current amount of concurrent requests
// being handled. It has an optional tenant label whenever a tenant is present in the context.
func NewInstrumentHandlerInflightTenant(gaugeVec *prometheus.GaugeVec, tenantHeader string, next http.HandlerFunc) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
tenant := r.Header.Get(tenantHeader)
promhttp.InstrumentHandlerInFlight(gaugeVec.With(prometheus.Labels{"tenant": tenant}), next).ServeHTTP(w, r)
}
// instrumentHandlerInFlight is responsible for counting the amount of
// in-flight HTTP requests (requests being processed by the handler) at a given
// moment in time.
// This is used instead of prometheus/client_golang/promhttp.InstrumentHandlerInFlight
// to be able to have the HTTP method as a label.
func instrumentHandlerInFlight(vec *prometheus.GaugeVec, next http.Handler) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
gauge := vec.With(prometheus.Labels{"method": r.Method})
gauge.Inc()
defer gauge.Dec()
next.ServeHTTP(w, r)
})
}
20 changes: 16 additions & 4 deletions pkg/extprom/http/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,11 @@ import (
)

type defaultMetrics struct {
requestDuration *prometheus.HistogramVec
requestSize *prometheus.SummaryVec
requestsTotal *prometheus.CounterVec
responseSize *prometheus.SummaryVec
requestDuration *prometheus.HistogramVec
requestSize *prometheus.SummaryVec
requestsTotal *prometheus.CounterVec
responseSize *prometheus.SummaryVec
inflightHTTPRequests *prometheus.GaugeVec
}

func newDefaultMetrics(reg prometheus.Registerer, buckets []float64, extraLabels []string) *defaultMetrics {
Expand All @@ -29,26 +30,37 @@ func newDefaultMetrics(reg prometheus.Registerer, buckets []float64, extraLabels
},
append([]string{"code", "handler", "method"}, extraLabels...),
),

requestSize: promauto.With(reg).NewSummaryVec(
prometheus.SummaryOpts{
Name: "http_request_size_bytes",
Help: "Tracks the size of HTTP requests.",
},
append([]string{"code", "handler", "method"}, extraLabels...),
),

requestsTotal: promauto.With(reg).NewCounterVec(
prometheus.CounterOpts{
Name: "http_requests_total",
Help: "Tracks the number of HTTP requests.",
},
append([]string{"code", "handler", "method"}, extraLabels...),
),

responseSize: promauto.With(reg).NewSummaryVec(
prometheus.SummaryOpts{
Name: "http_response_size_bytes",
Help: "Tracks the size of HTTP responses.",
},
append([]string{"code", "handler", "method"}, extraLabels...),
),

inflightHTTPRequests: promauto.With(reg).NewGaugeVec(
prometheus.GaugeOpts{
Name: "http_inflight_requests",
Help: "Current number of HTTP requests the handler is responding to.",
},
append([]string{"handler", "method"}, extraLabels...),
),
}
}