Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Split labels/series API endpoints in query frontend #3276

Merged
merged 16 commits into from
Oct 13, 2020
Merged
2 changes: 2 additions & 0 deletions .golangci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,8 @@ linters-settings:
exclude: ./.errcheck_excludes.txt
misspell:
locale: US
goconst:
min-occurrences: 5

issues:
exclude-rules:
Expand Down
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ We use *breaking :warning:* to mark changes that are not backward compatible (re
## Unreleased

- [#3261](https://github.com/thanos-io/thanos/pull/3261) Thanos Store: Use segment files specified in meta.json file, if present. If not present, Store does the LIST operation as before.
- [#3276](https://github.com/thanos-io/thanos/pull/3276) Query Frontend: Support query splitting and retry for labels and series requests.

## [v0.16.0](https://github.com/thanos-io/thanos/releases) - Release in progress

Expand Down
72 changes: 47 additions & 25 deletions cmd/thanos/query_frontend.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,33 +42,59 @@ func registerQueryFrontend(app *extkingpin.App) {
cmd := app.Command(comp.String(), "query frontend")
cfg := &queryFrontendConfig{
Config: queryfrontend.Config{
CortexFrontendConfig: &cortexfrontend.Config{},
CortexLimits: &cortexvalidation.Limits{},
CortexResultsCacheConfig: &queryrange.ResultsCacheConfig{},
CortexFrontendConfig: &cortexfrontend.Config{},
QueryRangeConfig: queryfrontend.QueryRangeConfig{
Limits: &cortexvalidation.Limits{},
ResultsCacheConfig: &queryrange.ResultsCacheConfig{},
},
LabelsConfig: queryfrontend.LabelsConfig{
Limits: &cortexvalidation.Limits{},
ResultsCacheConfig: &queryrange.ResultsCacheConfig{},
},
},
}

cfg.http.registerFlag(cmd)

cmd.Flag("query-range.split-interval", "Split queries by an interval and execute in parallel, it should be greater than 0 when response-cache-config is configured.").
Default("24h").DurationVar(&cfg.SplitQueriesByInterval)
// Query range tripperware flags.
cmd.Flag("query-range.split-interval", "Split query range requests by an interval and execute in parallel, it should be greater than 0 when query-range.response-cache-config is configured.").
Default("24h").DurationVar(&cfg.QueryRangeConfig.SplitQueriesByInterval)

cmd.Flag("query-range.max-retries-per-request", "Maximum number of retries for a single request; beyond this, the downstream error is returned.").
Default("5").IntVar(&cfg.MaxRetries)
cmd.Flag("query-range.max-retries-per-request", "Maximum number of retries for a single query range request; beyond this, the downstream error is returned.").
Default("5").IntVar(&cfg.QueryRangeConfig.MaxRetries)

cmd.Flag("query-range.max-query-length", "Limit the query time range (end - start time) in the query-frontend, 0 disables it.").
Default("0").DurationVar(&cfg.CortexLimits.MaxQueryLength)
Default("0").DurationVar(&cfg.QueryRangeConfig.Limits.MaxQueryLength)

cmd.Flag("query-range.max-query-parallelism", "Maximum number of queries will be scheduled in parallel by the Frontend.").
Default("14").IntVar(&cfg.CortexLimits.MaxQueryParallelism)
cmd.Flag("query-range.max-query-parallelism", "Maximum number of query range requests will be scheduled in parallel by the Frontend.").
Default("14").IntVar(&cfg.QueryRangeConfig.Limits.MaxQueryParallelism)

cmd.Flag("query-range.response-cache-max-freshness", "Most recent allowed cacheable result, to prevent caching very recent results that might still be in flux.").
Default("1m").DurationVar(&cfg.CortexLimits.MaxCacheFreshness)
cmd.Flag("query-range.response-cache-max-freshness", "Most recent allowed cacheable result for query range requests, to prevent caching very recent results that might still be in flux.").
Default("1m").DurationVar(&cfg.QueryRangeConfig.Limits.MaxCacheFreshness)

cmd.Flag("query-range.partial-response", "Enable partial response for queries if no partial_response param is specified. --no-query-range.partial-response for disabling.").
Default("true").BoolVar(&cfg.PartialResponseStrategy)
cmd.Flag("query-range.partial-response", "Enable partial response for query range requests if no partial_response param is specified. --no-query-range.partial-response for disabling.").
Default("true").BoolVar(&cfg.QueryRangeConfig.PartialResponseStrategy)

cfg.CachePathOrContent = *extflag.RegisterPathOrContent(cmd, "query-range.response-cache-config", "YAML file that contains response cache configuration.", false)
cfg.QueryRangeConfig.CachePathOrContent = *extflag.RegisterPathOrContent(cmd, "query-range.response-cache-config", "YAML file that contains response cache configuration.", false)

// Labels tripperware flags.
cmd.Flag("labels.split-interval", "Split labels requests by an interval and execute in parallel, it should be greater than 0 when labels.response-cache-config is configured.").
Default("24h").DurationVar(&cfg.LabelsConfig.SplitQueriesByInterval)

cmd.Flag("labels.max-retries-per-request", "Maximum number of retries for a single label/series API request; beyond this, the downstream error is returned.").
Default("5").IntVar(&cfg.LabelsConfig.MaxRetries)

cmd.Flag("labels.max-query-parallelism", "Maximum number of labels requests will be scheduled in parallel by the Frontend.").
Default("14").IntVar(&cfg.LabelsConfig.Limits.MaxQueryParallelism)

cmd.Flag("labels.response-cache-max-freshness", "Most recent allowed cacheable result for labels requests, to prevent caching very recent results that might still be in flux.").
Default("1m").DurationVar(&cfg.LabelsConfig.Limits.MaxCacheFreshness)

cmd.Flag("labels.partial-response", "Enable partial response for labels requests if no partial_response param is specified. --no-labels.partial-response for disabling.").
Default("true").BoolVar(&cfg.LabelsConfig.PartialResponseStrategy)

cmd.Flag("labels.default-time-range", "The default metadata time range duration for retrieving labels through Labels and Series API when the range parameters are not specified.").
Default("24h").DurationVar(&cfg.DefaultTimeRange)

cmd.Flag("cache-compression-type", "Use compression in results cache. Supported values are: 'snappy' and '' (disable compression).").
Default("").StringVar(&cfg.CacheCompression)
Expand Down Expand Up @@ -97,20 +123,16 @@ func runQueryFrontend(
cfg *queryFrontendConfig,
comp component.Component,
) error {
cacheConfContentYaml, err := cfg.CachePathOrContent.Content()
queryRangeCacheConfContentYaml, err := cfg.QueryRangeConfig.CachePathOrContent.Content()
if err != nil {
return err
}
if len(cacheConfContentYaml) > 0 {
cacheConfig, err := queryfrontend.NewCacheConfig(logger, cacheConfContentYaml)
if len(queryRangeCacheConfContentYaml) > 0 {
cacheConfig, err := queryfrontend.NewCacheConfig(logger, queryRangeCacheConfContentYaml)
if err != nil {
return errors.Wrap(err, "initializing the query frontend config")
}
if cfg.CortexResultsCacheConfig.CacheConfig.Memcache.Expiration == 0 {
level.Warn(logger).Log("msg", "memcached cache valid time set to 0, so using a default of 24 hours expiration time")
cfg.CortexResultsCacheConfig.CacheConfig.Memcache.Expiration = 24 * time.Hour
return errors.Wrap(err, "initializing the query range cache config")
}
cfg.CortexResultsCacheConfig = &queryrange.ResultsCacheConfig{
cfg.QueryRangeConfig.ResultsCacheConfig = &queryrange.ResultsCacheConfig{
Compression: cfg.CacheCompression,
CacheConfig: *cacheConfig,
}
Expand All @@ -128,7 +150,7 @@ func runQueryFrontend(

tripperWare, err := queryfrontend.NewTripperware(cfg.Config, reg, logger)
if err != nil {
return errors.Wrap(err, "setup query range middlewares")
return errors.Wrap(err, "setup tripperwares")
}

fe.Wrap(tripperWare)
Expand Down
128 changes: 78 additions & 50 deletions docs/components/query-frontend.md
Original file line number Diff line number Diff line change
Expand Up @@ -102,71 +102,99 @@ usage: thanos query-frontend [<flags>]
query frontend

Flags:
-h, --help Show context-sensitive help (also try --help-long
and --help-man).
--version Show application version.
--log.level=info Log filtering level.
--log.format=logfmt Log format to use. Possible options: logfmt or
json.
-h, --help Show context-sensitive help (also try
--help-long and --help-man).
--version Show application version.
--log.level=info Log filtering level.
--log.format=logfmt Log format to use. Possible options: logfmt or
json.
--tracing.config-file=<file-path>
Path to YAML file with tracing configuration. See
format details:
https://thanos.io/tip/thanos/tracing.md/#configuration
Path to YAML file with tracing configuration.
See format details:
https://thanos.io/tip/thanos/tracing.md/#configuration
--tracing.config=<content>
Alternative to 'tracing.config-file' flag (lower
priority). Content of YAML file with tracing
configuration. See format details:
https://thanos.io/tip/thanos/tracing.md/#configuration
Alternative to 'tracing.config-file' flag
(lower priority). Content of YAML file with
tracing configuration. See format details:
https://thanos.io/tip/thanos/tracing.md/#configuration
--http-address="0.0.0.0:10902"
Listen host:port for HTTP endpoints.
--http-grace-period=2m Time to wait after an interrupt received for HTTP
Server.
Listen host:port for HTTP endpoints.
--http-grace-period=2m Time to wait after an interrupt received for
HTTP Server.
--query-range.split-interval=24h
Split queries by an interval and execute in
parallel, it should be greater than 0 when
response-cache-config is configured.
Split query range requests by an interval and
execute in parallel, it should be greater than
0 when query-range.response-cache-config is
configured.
--query-range.max-retries-per-request=5
Maximum number of retries for a single request;
beyond this, the downstream error is returned.
Maximum number of retries for a single query
range request; beyond this, the downstream
error is returned.
--query-range.max-query-length=0
Limit the query time range (end - start time) in
the query-frontend, 0 disables it.
Limit the query time range (end - start time)
in the query-frontend, 0 disables it.
--query-range.max-query-parallelism=14
Maximum number of queries will be scheduled in
parallel by the Frontend.
Maximum number of query range requests will be
scheduled in parallel by the Frontend.
--query-range.response-cache-max-freshness=1m
Most recent allowed cacheable result, to prevent
caching very recent results that might still be in
flux.
Most recent allowed cacheable result for query
range requests, to prevent caching very recent
results that might still be in flux.
--query-range.partial-response
Enable partial response for queries if no
partial_response param is specified.
--no-query-range.partial-response for disabling.
Enable partial response for query range
requests if no partial_response param is
specified. --no-query-range.partial-response
for disabling.
--query-range.response-cache-config-file=<file-path>
Path to YAML file that contains response cache
configuration.
Path to YAML file that contains response cache
configuration.
--query-range.response-cache-config=<content>
Alternative to
'query-range.response-cache-config-file' flag
(lower priority). Content of YAML file that
contains response cache configuration.
Alternative to
'query-range.response-cache-config-file' flag
(lower priority). Content of YAML file that
contains response cache configuration.
--labels.split-interval=24h
Split labels requests by an interval and
execute in parallel, it should be greater than
0 when labels.response-cache-config is
configured.
--labels.max-retries-per-request=5
Maximum number of retries for a single
label/series API request; beyond this, the
downstream error is returned.
--labels.max-query-parallelism=14
Maximum number of labels requests will be
scheduled in parallel by the Frontend.
--labels.response-cache-max-freshness=1m
Most recent allowed cacheable result for labels
requests, to prevent caching very recent
results that might still be in flux.
--labels.partial-response Enable partial response for labels requests if
no partial_response param is specified.
--no-labels.partial-response for disabling.
--labels.default-time-range=24h
The default metadata time range duration for
retrieving labels through Labels and Series API
when the range parameters are not specified.
--cache-compression-type=""
Use compression in results cache. Supported values
are: 'snappy' and ” (disable compression).
Use compression in results cache. Supported
values are: 'snappy' and ” (disable
compression).
--query-frontend.downstream-url="http://localhost:9090"
URL of downstream Prometheus Query compatible API.
URL of downstream Prometheus Query compatible
API.
--query-frontend.compress-responses
Compress HTTP responses.
Compress HTTP responses.
--query-frontend.log-queries-longer-than=0
Log queries that are slower than the specified
duration. Set to 0 to disable. Set to < 0 to
enable on all queries.
Log queries that are slower than the specified
duration. Set to 0 to disable. Set to < 0 to
enable on all queries.
--log.request.decision=LogFinishCall
Request Logging for logging the start and end of
requests. LogFinishCall is enabled by default.
LogFinishCall : Logs the finish call of the
requests. LogStartAndFinishCall : Logs the start
and finish call of the requests. NoLogCall :
Disable request logging.
Request Logging for logging the start and end
of requests. LogFinishCall is enabled by
default. LogFinishCall : Logs the finish call
of the requests. LogStartAndFinishCall : Logs
the start and finish call of the requests.
NoLogCall : Disable request logging.

```
6 changes: 3 additions & 3 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -44,11 +44,11 @@ require (
github.com/opentracing/opentracing-go v1.2.0
github.com/pkg/errors v0.9.1
github.com/pmezard/go-difflib v1.0.0
github.com/prometheus/alertmanager v0.21.0
github.com/prometheus/alertmanager v0.21.1-0.20200911160112-1fdff6b3f939
github.com/prometheus/client_golang v1.7.1
github.com/prometheus/client_model v0.2.0
github.com/prometheus/common v0.13.0
github.com/prometheus/prometheus v1.8.2-0.20200819132913-cb830b0a9c78
github.com/prometheus/common v0.14.0
github.com/prometheus/prometheus v1.8.2-0.20200923143134-7e2db3d092f3
github.com/uber/jaeger-client-go v2.25.0+incompatible
github.com/uber/jaeger-lib v2.2.0+incompatible
github.com/weaveworks/common v0.0.0-20200914083218-61ffdd448099
Expand Down
7 changes: 5 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -865,8 +865,9 @@ github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZN
github.com/posener/complete v1.1.1/go.mod h1:em0nMJCgc9GFtwrmVmEMR/ZL6WyhyjMBndrE9hABlRI=
github.com/posener/complete v1.2.3/go.mod h1:WZIdtGGp+qx0sLrYKtIRAruyNpv6hFCicSgv7Sy7s/s=
github.com/prometheus/alertmanager v0.19.0/go.mod h1:Eyp94Yi/T+kdeb2qvq66E3RGuph5T/jm/RBVh4yz1xo=
github.com/prometheus/alertmanager v0.21.0 h1:qK51JcUR9l/unhawGA9F9B64OCYfcGewhPNprem/Acc=
github.com/prometheus/alertmanager v0.21.0/go.mod h1:h7tJ81NA0VLWvWEayi1QltevFkLF3KxmC/malTcT8Go=
github.com/prometheus/alertmanager v0.21.1-0.20200911160112-1fdff6b3f939 h1:/gGoc4W45469qMuGGEMArYEs8wsk31/5oE56NUGjEN0=
github.com/prometheus/alertmanager v0.21.1-0.20200911160112-1fdff6b3f939/go.mod h1:imXRHOP6QTsE0fFsIsAV/cXimS32m7gVZOiUj11m6Ig=
github.com/prometheus/client_golang v0.8.0/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw=
github.com/prometheus/client_golang v0.9.1/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw=
github.com/prometheus/client_golang v0.9.2/go.mod h1:OsXs2jCmiKlQ1lTBmv21f2mNfw4xf/QclQDMrYNZzcM=
Expand Down Expand Up @@ -898,8 +899,10 @@ github.com/prometheus/common v0.8.0/go.mod h1:PC/OgXc+UN7B4ALwvn1yzVZmVwvhXp5Jsb
github.com/prometheus/common v0.9.1/go.mod h1:yhUN8i9wzaXS3w1O07YhxHEBxD+W35wd8bs7vj7HSQ4=
github.com/prometheus/common v0.10.0/go.mod h1:Tlit/dnDKsSWFlCLTWaA1cyBgKHSMdTB80sz/V91rCo=
github.com/prometheus/common v0.11.1/go.mod h1:U+gB1OBLb1lF3O42bTCL+FK18tX9Oar16Clt/msog/s=
github.com/prometheus/common v0.13.0 h1:vJlpe9wPgDRM1Z+7Wj3zUUjY1nr6/1jNKyl7llliccg=
github.com/prometheus/common v0.12.0/go.mod h1:U+gB1OBLb1lF3O42bTCL+FK18tX9Oar16Clt/msog/s=
github.com/prometheus/common v0.13.0/go.mod h1:U+gB1OBLb1lF3O42bTCL+FK18tX9Oar16Clt/msog/s=
github.com/prometheus/common v0.14.0 h1:RHRyE8UocrbjU+6UvRzwi6HjiDfxrrBU91TtbKzkGp4=
github.com/prometheus/common v0.14.0/go.mod h1:U+gB1OBLb1lF3O42bTCL+FK18tX9Oar16Clt/msog/s=
github.com/prometheus/node_exporter v1.0.0-rc.0.0.20200428091818-01054558c289 h1:dTUS1vaLWq+Y6XKOTnrFpoVsQKLCbCp1OLj24TDi7oM=
github.com/prometheus/node_exporter v1.0.0-rc.0.0.20200428091818-01054558c289/go.mod h1:FGbBv5OPKjch+jNUJmEQpMZytIdyW0NdBtWFcfSKusc=
github.com/prometheus/procfs v0.0.0-20180612222113-7d6f385de8be/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk=
Expand Down
Loading