diff --git a/CHANGELOG.md b/CHANGELOG.md index 2a3349c5ac..59015a11f5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,6 +21,7 @@ We use *breaking :warning:* to mark changes that are not backward compatible (re - [#4506](https://github.com/thanos-io/thanos/pull/4506) `Baidu BOS` object storage, see [documents](docs/storage.md#baidu-bos) for further information. - [#4552](https://github.com/thanos-io/thanos/pull/4552) Compact: Adds `thanos_compact_downsample_duration_seconds` histogram. - [#4594](https://github.com/thanos-io/thanos/pull/4594) reloader: Expose metrics in config reloader to give info on the last operation. +- [#4623](https://github.com/thanos-io/thanos/pull/4623) query-frontend: made HTTP downstream tripper (client) configurable via parameters `--query-range.downstream-tripper-config` and `--query-range.downstream-tripper-config-file`. If your downstream URL is localhost or 127.0.0.1 then it is strongly recommended to bump `max_idle_conns_per_host` to at least 100 so that `query-frontend` could properly use HTTP keep-alive connections and thus reduce the latency of `query-frontend` by about 20%. ### Fixed diff --git a/cmd/thanos/query_frontend.go b/cmd/thanos/query_frontend.go index fdbae44066..51fdabcb89 100644 --- a/cmd/thanos/query_frontend.go +++ b/cmd/thanos/query_frontend.go @@ -19,10 +19,12 @@ import ( "github.com/pkg/errors" "github.com/prometheus/client_golang/prometheus" "github.com/weaveworks/common/user" + "gopkg.in/yaml.v2" extflag "github.com/efficientgo/tools/extkingpin" "github.com/thanos-io/thanos/pkg/api" "github.com/thanos-io/thanos/pkg/component" + "github.com/thanos-io/thanos/pkg/exthttp" "github.com/thanos-io/thanos/pkg/extkingpin" "github.com/thanos-io/thanos/pkg/extprom" extpromhttp "github.com/thanos-io/thanos/pkg/extprom/http" @@ -118,6 +120,8 @@ func registerQueryFrontend(app *extkingpin.App) { cmd.Flag("query-frontend.downstream-url", "URL of downstream Prometheus Query compatible API."). Default("http://localhost:9090").StringVar(&cfg.DownstreamURL) + cfg.DownstreamTripperConfig.CachePathOrContent = *extflag.RegisterPathOrContent(cmd, "query-frontend.downstream-tripper-config", "YAML file that contains downstream tripper configuration. If your downstream URL is localhost or 127.0.0.1 then it is highly recommended to increase max_idle_conns_per_host to at least 100.", extflag.WithEnvSubstitution()) + cmd.Flag("query-frontend.compress-responses", "Compress HTTP responses."). Default("false").BoolVar(&cfg.CompressResponses) @@ -142,6 +146,41 @@ func registerQueryFrontend(app *extkingpin.App) { }) } +func parseTransportConfiguration(downstreamTripperConfContentYaml []byte) (*http.Transport, error) { + downstreamTripper := exthttp.NewTransport() + + if len(downstreamTripperConfContentYaml) > 0 { + tripperConfig := &queryfrontend.DownstreamTripperConfig{} + if err := yaml.UnmarshalStrict(downstreamTripperConfContentYaml, tripperConfig); err != nil { + return nil, errors.Wrap(err, "parsing downstream tripper config YAML file") + } + + if tripperConfig.IdleConnTimeout > 0 { + downstreamTripper.IdleConnTimeout = time.Duration(tripperConfig.IdleConnTimeout) + } + if tripperConfig.ResponseHeaderTimeout > 0 { + downstreamTripper.ResponseHeaderTimeout = time.Duration(tripperConfig.ResponseHeaderTimeout) + } + if tripperConfig.TLSHandshakeTimeout > 0 { + downstreamTripper.TLSHandshakeTimeout = time.Duration(tripperConfig.TLSHandshakeTimeout) + } + if tripperConfig.ExpectContinueTimeout > 0 { + downstreamTripper.ExpectContinueTimeout = time.Duration(tripperConfig.ExpectContinueTimeout) + } + if tripperConfig.MaxIdleConns != nil { + downstreamTripper.MaxIdleConns = *tripperConfig.MaxIdleConns + } + if tripperConfig.MaxIdleConnsPerHost != nil { + downstreamTripper.MaxIdleConnsPerHost = *tripperConfig.MaxIdleConnsPerHost + } + if tripperConfig.MaxConnsPerHost != nil { + downstreamTripper.MaxConnsPerHost = *tripperConfig.MaxConnsPerHost + } + } + + return downstreamTripper, nil +} + func runQueryFrontend( g *run.Group, logger log.Logger, @@ -191,7 +230,16 @@ func runQueryFrontend( } // Create a downstream roundtripper. - roundTripper, err := cortexfrontend.NewDownstreamRoundTripper(cfg.DownstreamURL, http.DefaultTransport) + downstreamTripperConfContentYaml, err := cfg.DownstreamTripperConfig.CachePathOrContent.Content() + if err != nil { + return err + } + downstreamTripper, err := parseTransportConfiguration(downstreamTripperConfContentYaml) + if err != nil { + return err + } + + roundTripper, err := cortexfrontend.NewDownstreamRoundTripper(cfg.DownstreamURL, downstreamTripper) if err != nil { return errors.Wrap(err, "setup downstream roundtripper") } diff --git a/docs/components/query-frontend.md b/docs/components/query-frontend.md index 3f32247248..196891f3b2 100644 --- a/docs/components/query-frontend.md +++ b/docs/components/query-frontend.md @@ -103,6 +103,22 @@ Query Frontend supports `--query-frontend.log-queries-longer-than` flag to log q Naming is hard :) Please check [here](https://github.com/thanos-io/thanos/pull/2434#discussion_r408300683) to see why we chose `query-frontend` as the name. +## Recommended Downstream Tripper Configuration + +You can configure the parameters of the HTTP client that `query-frontend` uses for the downstream URL with parameters `--query-range.downstream-tripper-config` and `--query-range.downstream-tripper-config-file`. If it is pointing to a single host, most likely a load-balancer, then it is highly recommended to increase `max_idle_conns_per_host` via these parameters to at least 100 because otherwise `query-frontend` will not be able to leverage HTTP keep-alive connections, and the latency will be 10 - 20% higher. By default, the Go HTTP client will only keep two idle connections per each host. + +Keys which denote a duration are strings that can end with `s` or `m` to indicate seconds or minutes respectively. All of the other keys are integers. Supported keys are: + +* `idle_conn_timeout` - timeout of idle connections (string); +* `response_header_timeout` - maximum duration to wait for a response header (string); +* `tls_handshake_timeout` - maximum duration of a TLS handshake (string); +* `expect_continue_timeout` - [Go source code](https://github.com/golang/go/blob/912f0750472dd4f674b69ca1616bfaf377af1805/src/net/http/transport.go#L220-L226) (string); +* `max_idle_conns` - maximum number of idle connections to all hosts (integer); +* `max_idle_conns_per_host` - maximum number of idle connections to each host (integer); +* `max_conns_per_host` - maximum number of connections to each host (integer); + +You can find the default values [here](https://github.com/thanos-io/thanos/blob/55cb8ca38b3539381dc6a781e637df15c694e50a/pkg/exthttp/transport.go#L12-L27). + ## Flags ```$ mdox-exec="thanos query-frontend --help" @@ -170,6 +186,21 @@ Flags: Disable request logging. --query-frontend.compress-responses Compress HTTP responses. + --query-frontend.downstream-tripper-config= + Alternative to + 'query-frontend.downstream-tripper-config-file' + flag (mutually exclusive). Content of YAML file + that contains downstream tripper configuration. + If your downstream URL is localhost or + 127.0.0.1 then it is highly recommended to + increase max_idle_conns_per_host to at least + 100. + --query-frontend.downstream-tripper-config-file= + Path to YAML file that contains downstream + tripper configuration. If your downstream URL + is localhost or 127.0.0.1 then it is highly + recommended to increase max_idle_conns_per_host + to at least 100. --query-frontend.downstream-url="http://localhost:9090" URL of downstream Prometheus Query compatible API. diff --git a/pkg/queryfrontend/config.go b/pkg/queryfrontend/config.go index 9a47ca9178..8dbaff50e9 100644 --- a/pkg/queryfrontend/config.go +++ b/pkg/queryfrontend/config.go @@ -17,6 +17,7 @@ import ( "gopkg.in/yaml.v2" extflag "github.com/efficientgo/tools/extkingpin" + prommodel "github.com/prometheus/common/model" "github.com/thanos-io/thanos/pkg/cacheutil" "github.com/thanos-io/thanos/pkg/model" ) @@ -138,10 +139,24 @@ func NewCacheConfig(logger log.Logger, confContentYaml []byte) (*cortexcache.Con } } +// DownstreamTripperConfig stores the http.Transport configuration for query-frontend's HTTP downstream tripper. +type DownstreamTripperConfig struct { + IdleConnTimeout prommodel.Duration `yaml:"idle_conn_timeout"` + ResponseHeaderTimeout prommodel.Duration `yaml:"response_header_timeout"` + TLSHandshakeTimeout prommodel.Duration `yaml:"tls_handshake_timeout"` + ExpectContinueTimeout prommodel.Duration `yaml:"expect_continue_timeout"` + MaxIdleConns *int `yaml:"max_idle_conns"` + MaxIdleConnsPerHost *int `yaml:"max_idle_conns_per_host"` + MaxConnsPerHost *int `yaml:"max_conns_per_host"` + + CachePathOrContent extflag.PathOrContent +} + // Config holds the query frontend configs. type Config struct { QueryRangeConfig LabelsConfig + DownstreamTripperConfig CortexHandlerConfig *transport.HandlerConfig CompressResponses bool