diff --git a/CHANGELOG.md b/CHANGELOG.md index 64c341911d..ce3b8d53c7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,7 @@ We use *breaking :warning:* to mark changes that are not backward compatible (re - [#6192](https://github.com/thanos-io/thanos/pull/6192) Store: add flag `bucket-web-label` to select the label to use as timeline title in web UI - [#6167](https://github.com/thanos-io/thanos/pull/6195) Receive: add flag `tsdb.too-far-in-future.time-window` to prevent clock skewed samples to pollute TSDB head and block all valid incoming samples. - [#6273](https://github.com/thanos-io/thanos/pull/6273) Mixin: Allow specifying an instance name filter in dashboards +- [#6163](https://github.com/thanos-io/thanos/pull/6163) Receiver: Add hidden flag `--receive-forward-max-backoff` to configure the max backoff for forwarding requests. ### Fixed @@ -39,6 +40,7 @@ We use *breaking :warning:* to mark changes that are not backward compatible (re - [#6244](https://github.com/thanos-io/thanos/pull/6244) mixin(Rule): Add rule evaluation failures to the Rule dashboard. - [#6303](https://github.com/thanos-io/thanos/pull/6303) Store: added and start using streamed snappy encoding for postings list instead of block based one. This leads to constant memory usage during decompression. This approximately halves memory usage when decompressing a postings list in index cache. - [#6071](https://github.com/thanos-io/thanos/pull/6071) Query Frontend: *breaking :warning:* Add experimental native histogram support for which we updated and aligned with the [Prometheus common](https://github.com/prometheus/common) model, which is used for caching so a cache reset required. +- [#6163](https://github.com/thanos-io/thanos/pull/6163) Receiver: changed max backoff from 30s to 5s for forwarding requests. Can be configured with `--receive-forward-max-backoff`. ### Removed diff --git a/cmd/thanos/receive.go b/cmd/thanos/receive.go index cc01e74a17..2da933f07d 100644 --- a/cmd/thanos/receive.go +++ b/cmd/thanos/receive.go @@ -246,6 +246,7 @@ func runReceive( TLSConfig: rwTLSConfig, DialOpts: dialOpts, ForwardTimeout: time.Duration(*conf.forwardTimeout), + MaxBackoff: time.Duration(*conf.maxBackoff), TSDBStats: dbs, Limiter: limiter, }) @@ -781,6 +782,7 @@ type receiveConfig struct { replicaHeader string replicationFactor uint64 forwardTimeout *model.Duration + maxBackoff *model.Duration compression string tsdbMinBlockDuration *model.Duration @@ -872,6 +874,8 @@ func (rc *receiveConfig) registerFlag(cmd extkingpin.FlagClause) { rc.forwardTimeout = extkingpin.ModelDuration(cmd.Flag("receive-forward-timeout", "Timeout for each forward request.").Default("5s").Hidden()) + rc.maxBackoff = extkingpin.ModelDuration(cmd.Flag("receive-forward-max-backoff", "Maximum backoff for each forward fan-out request").Default("5s").Hidden()) + rc.relabelConfigPath = extflag.RegisterPathOrContent(cmd, "receive.relabel-config", "YAML file that contains relabeling configuration.", extflag.WithEnvSubstitution()) rc.tsdbMinBlockDuration = extkingpin.ModelDuration(cmd.Flag("tsdb.min-block-duration", "Min duration for local TSDB blocks").Default("2h").Hidden()) diff --git a/pkg/receive/handler.go b/pkg/receive/handler.go index d006aad2d3..3cce0552ab 100644 --- a/pkg/receive/handler.go +++ b/pkg/receive/handler.go @@ -98,6 +98,7 @@ type Options struct { TLSConfig *tls.Config DialOpts []grpc.DialOption ForwardTimeout time.Duration + MaxBackoff time.Duration RelabelConfigs []*relabel.Config TSDBStats TSDBStats Limiter *Limiter @@ -148,7 +149,7 @@ func NewHandler(logger log.Logger, o *Options) *Handler { expBackoff: backoff.Backoff{ Factor: 2, Min: 100 * time.Millisecond, - Max: 30 * time.Second, + Max: o.MaxBackoff, Jitter: true, }, Limiter: o.Limiter,