Skip to content

Commit

Permalink
Consolidate backend metrics and add hedged request metric (#790)
Browse files Browse the repository at this point in the history
* Consolidate backend metrics, add heged metric

* Update changelog

* Add deprecated message to older metrics
  • Loading branch information
josephwoodward committed Jun 30, 2021
1 parent 3efc8ca commit f542976
Show file tree
Hide file tree
Showing 13 changed files with 273 additions and 144 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
## main / unreleased

* [ENHANCEMENT] Added hedged request metric `tempodb_backend_hedged_roundtrips_total` and a new storage agnostic `tempodb_backend_request_duration_seconds` metric that
supersedes the soon-to-be deprecated storage specific metrics (`tempodb_azure_request_duration_seconds`, `tempodb_s3_request_duration_seconds` and `tempodb_gcs_request_duration_seconds`). [#790](https://github.com/grafana/tempo/pull/790) (@JosephWoodward)
* [CHANGE] Jsonnet: use dedicated configmaps for distributors and ingesters [#775](https://github.com/grafana/tempo/pull/775) (@kvrhdn)
* [FEATURE] Added the ability to hedge requests with all backends [#750](https://github.com/grafana/tempo/pull/750) (@joe-elliott)
* [ENHANCEMENT] Performance: improve compaction speed with concurrent reads and writes [#754](https://github.com/grafana/tempo/pull/754) (@mdisibio)
Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ require (
github.com/alecthomas/kong v0.2.11
github.com/cespare/xxhash v1.1.0
github.com/cortexproject/cortex v1.8.1-0.20210422151339-cf1c444e0905
github.com/cristalhq/hedgedhttp v0.4.0
github.com/cristalhq/hedgedhttp v0.6.0
github.com/drone/envsubst v1.0.3
github.com/dustin/go-humanize v1.0.0
github.com/go-kit/kit v0.10.0
Expand Down
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -313,8 +313,8 @@ github.com/cpuguy83/go-md2man/v2 v2.0.0/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsr
github.com/creack/pty v1.1.7/go.mod h1:lj5s0c3V2DBrqTV7llrYr5NG6My20zk30Fl46Y7DoTY=
github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
github.com/creack/pty v1.1.11/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
github.com/cristalhq/hedgedhttp v0.4.0 h1:J1z1zKJ1bEFpMLjZWgtX0inUWlWecNyouWRIQknGzgM=
github.com/cristalhq/hedgedhttp v0.4.0/go.mod h1:XkqWU6qVMutbhW68NnzjWrGtH8NUx1UfYqGYtHVKIsI=
github.com/cristalhq/hedgedhttp v0.6.0 h1:32REZ0SZ1q0xoRNpHP5ab+Qd3VseyURXjN3HFpBqTiE=
github.com/cristalhq/hedgedhttp v0.6.0/go.mod h1:XkqWU6qVMutbhW68NnzjWrGtH8NUx1UfYqGYtHVKIsI=
github.com/crossdock/crossdock-go v0.0.0-20160816171116-049aabb0122b/go.mod h1:v9FBN7gdVTpiD/+LZ7Po0UKvROyT87uLVxTHVky/dlQ=
github.com/cznic/b v0.0.0-20180115125044-35e9bbe41f07/go.mod h1:URriBxXwVq5ijiJ12C7iIZqlA69nTlI+LgI6/pwftG8=
github.com/cznic/fileutil v0.0.0-20180108211300-6a051e75936f/go.mod h1:8S58EK26zhXSxzv7NQFpnliaOQsmDUxvoQO3rt154Vg=
Expand Down
8 changes: 6 additions & 2 deletions tempodb/backend/azure/azure_helpers.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ import (
"strings"
"time"

"github.com/grafana/tempo/tempodb/backend/instrumentation"

"github.com/Azure/azure-pipeline-go/pipeline"
blob "github.com/Azure/azure-storage-blob-go/azblob"
"github.com/cristalhq/hedgedhttp"
Expand Down Expand Up @@ -35,11 +37,13 @@ func GetContainerURL(ctx context.Context, cfg *Config, hedge bool) (blob.Contain
customTransport := http.DefaultTransport.(*http.Transport).Clone()

// add instrumentation
transport := newInstrumentedTransport(customTransport)
transport := instrumentation.NewAzureTransport(customTransport)
var stats *hedgedhttp.Stats

// hedge if desired (0 means disabled)
if hedge && cfg.HedgeRequestsAt != 0 {
transport = hedgedhttp.NewRoundTripper(cfg.HedgeRequestsAt, uptoHedgedRequests, transport)
transport, stats = hedgedhttp.NewRoundTripperAndStats(cfg.HedgeRequestsAt, uptoHedgedRequests, transport)
instrumentation.PublishHedgedMetrics(stats)
}

client := http.Client{Transport: transport}
Expand Down
41 changes: 0 additions & 41 deletions tempodb/backend/azure/instrumentation.go

This file was deleted.

8 changes: 6 additions & 2 deletions tempodb/backend/gcs/gcs.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ import (
"strings"
"time"

"github.com/grafana/tempo/tempodb/backend/instrumentation"

"cloud.google.com/go/storage"
"github.com/cristalhq/hedgedhttp"
"github.com/google/uuid"
Expand Down Expand Up @@ -319,11 +321,13 @@ func createBucket(ctx context.Context, cfg *Config, hedge bool) (*storage.Bucket
}

// add instrumentation
transport = newInstrumentedTransport(transport)
transport = instrumentation.NewGCSTransport(transport)
var stats *hedgedhttp.Stats

// hedge if desired (0 means disabled)
if hedge && cfg.HedgeRequestsAt != 0 {
transport = hedgedhttp.NewRoundTripper(cfg.HedgeRequestsAt, uptoHedgedRequests, transport)
transport, stats = hedgedhttp.NewRoundTripperAndStats(cfg.HedgeRequestsAt, uptoHedgedRequests, transport)
instrumentation.PublishHedgedMetrics(stats)
}

// build client
Expand Down
43 changes: 0 additions & 43 deletions tempodb/backend/gcs/instrumentation.go

This file was deleted.

80 changes: 80 additions & 0 deletions tempodb/backend/instrumentation/backend_transports.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
package instrumentation

import (
"net/http"
"strconv"
"time"

"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
)

var (
gcsRequestDuration = promauto.NewHistogramVec(prometheus.HistogramOpts{
Namespace: "tempodb",
Name: "gcs_request_duration_seconds",
Help: "Time spent doing GCS requests. (DEPRECATED: See tempodb_backend_request_duration_seconds)",
Buckets: prometheus.ExponentialBuckets(0.005, 4, 6),
}, []string{"operation", "status_code"})

s3RequestDuration = promauto.NewHistogramVec(prometheus.HistogramOpts{
Namespace: "tempodb",
Name: "s3_request_duration_seconds",
Help: "Time spent doing AWS S3 requests. (DEPRECATED: See tempodb_backend_request_duration_seconds)",
Buckets: prometheus.ExponentialBuckets(0.005, 4, 6),
}, []string{"operation", "status_code"})

azureRequestDuration = promauto.NewHistogramVec(prometheus.HistogramOpts{
Namespace: "tempodb",
Name: "azure_request_duration_seconds",
Help: "Time spent doing Azure requests. (DEPRECATED: See tempodb_backend_request_duration_seconds)",
Buckets: prometheus.ExponentialBuckets(0.005, 4, 6),
}, []string{"operation", "status_code"})

requestDuration = promauto.NewHistogramVec(prometheus.HistogramOpts{
Namespace: "tempodb",
Name: "backend_request_duration_seconds",
Help: "Time spent doing backend storage requests.",
Buckets: prometheus.ExponentialBuckets(0.005, 4, 6),
}, []string{"operation", "status_code"})
)

type instrumentedTransport struct {
legacyObserver prometheus.ObserverVec
observer prometheus.ObserverVec
next http.RoundTripper
}

func NewGCSTransport(next http.RoundTripper) http.RoundTripper {
return instrumentedTransport{
next: next,
observer: requestDuration,
legacyObserver: gcsRequestDuration,
}
}

func NewS3Transport(next http.RoundTripper) http.RoundTripper {
return instrumentedTransport{
next: next,
observer: requestDuration,
legacyObserver: s3RequestDuration,
}
}

func NewAzureTransport(next http.RoundTripper) http.RoundTripper {
return instrumentedTransport{
next: next,
observer: requestDuration,
legacyObserver: azureRequestDuration,
}
}

func (i instrumentedTransport) RoundTrip(req *http.Request) (*http.Response, error) {
start := time.Now()
resp, err := i.next.RoundTrip(req)
if err == nil {
i.legacyObserver.WithLabelValues(req.Method, strconv.Itoa(resp.StatusCode)).Observe(time.Since(start).Seconds())
i.observer.WithLabelValues(req.Method, strconv.Itoa(resp.StatusCode)).Observe(time.Since(start).Seconds())
}
return resp, err
}
33 changes: 33 additions & 0 deletions tempodb/backend/instrumentation/hedged_requests.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
package instrumentation

import (
"time"

"github.com/cristalhq/hedgedhttp"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
)

const (
hedgedMetricsPublishDuration = 10 * time.Second
)

var (
hedgedRequestsMetrics = promauto.NewCounter(
prometheus.CounterOpts{
Namespace: "tempodb",
Name: "backend_hedged_roundtrips_total",
Help: "Total number of hedged backend requests",
},
)
)

// PublishHedgedMetrics flushes metrics from hedged requests every 10 seconds
func PublishHedgedMetrics(s *hedgedhttp.Stats) {
ticker := time.NewTicker(hedgedMetricsPublishDuration)
go func() {
for range ticker.C {
hedgedRequestsMetrics.Add(float64(s.Snapshot().RequestedRoundTrips))
}
}()
}
41 changes: 0 additions & 41 deletions tempodb/backend/s3/instrumentation.go

This file was deleted.

8 changes: 6 additions & 2 deletions tempodb/backend/s3/s3.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ import (
"net/http"
"strings"

"github.com/grafana/tempo/tempodb/backend/instrumentation"

log_util "github.com/cortexproject/cortex/pkg/util/log"
"github.com/cristalhq/hedgedhttp"
"github.com/go-kit/kit/log"
Expand Down Expand Up @@ -400,10 +402,12 @@ func createCore(cfg *Config, hedge bool) (*minio.Core, error) {
}

// add instrumentation
transport := newInstrumentedTransport(customTransport)
transport := instrumentation.NewS3Transport(customTransport)
var stats *hedgedhttp.Stats

if hedge && cfg.HedgeRequestsAt != 0 {
transport = hedgedhttp.NewRoundTripper(cfg.HedgeRequestsAt, uptoHedgedRequests, transport)
transport, stats = hedgedhttp.NewRoundTripperAndStats(cfg.HedgeRequestsAt, uptoHedgedRequests, transport)
instrumentation.PublishHedgedMetrics(stats)
}

opts := &minio.Options{
Expand Down
Loading

0 comments on commit f542976

Please sign in to comment.