From efcdf5e841872f15e968d81bb56ceb9c4784942f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1s=20Mota?= Date: Wed, 2 Mar 2022 11:25:00 +0100 Subject: [PATCH 01/24] fix: add null check to exemplar data (#5202) Signed-off-by: Thomas Mota --- pkg/exemplars/exemplars.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pkg/exemplars/exemplars.go b/pkg/exemplars/exemplars.go index 36b0ce9ffbf..2817d155557 100644 --- a/pkg/exemplars/exemplars.go +++ b/pkg/exemplars/exemplars.go @@ -89,6 +89,10 @@ func (rr *GRPCClient) Exemplars(ctx context.Context, req *exemplarspb.ExemplarsR return nil, nil, errors.Wrap(err, "proxy Exemplars") } + if resp.data == nil { + return make([]*exemplarspb.ExemplarData, 0), resp.warnings, nil + } + resp.data = dedupExemplarsResponse(resp.data, rr.replicaLabels) return resp.data, resp.warnings, nil } From 13b0b379f3a29ed390af40499a781812f406218b Mon Sep 17 00:00:00 2001 From: Matej Gera <38492574+matej-g@users.noreply.github.com> Date: Wed, 2 Mar 2022 11:25:19 +0100 Subject: [PATCH 02/24] Update release process doc (#5192) Signed-off-by: Matej Gera --- docs/release-process.md | 70 +++++++++++++++++++++++------------------ 1 file changed, 39 insertions(+), 31 deletions(-) diff --git a/docs/release-process.md b/docs/release-process.md index b035948ea43..3e1bc11cdfb 100644 --- a/docs/release-process.md +++ b/docs/release-process.md @@ -58,48 +58,56 @@ Process of releasing a *minor* Thanos version: Release is happening on separate `release-.` branch. -1. Prepare PR to branch `release-.` that will start minor release branch and prepare changes to cut release. +### Prepare the release branch - Push the created branch to origin (Thanos repository) to be able to make your PR with the CHANGELOG.md changes against this branch later. +Prepare branch `release-.` that will start minor release branch and prepare changes to cut release. - ```bash - $ git push origin release-. - ``` +Push the created branch to origin (Thanos repository) to be able to make your PR with the CHANGELOG.md changes against this branch later. + +```bash +$ git push origin release-. +``` -For release candidate just reuse same branch and rebase it on every candidate until the actual release happens. +For release candidate, reuse the same branch and rebase it on every candidate until the actual release happens. -1. Create small PR to `main` (!) to cut CHANGELOG. This helps to maintain new changelog on main. +### Indicate that a release is in progress - 1. Add entry to CHANGELOG indicating release in progress. This reduces risk for the new PRs to add changelog entries to already released release. - 2. Update `VERSION` file to version one minor version higher than the released one and `dev` suffix. This allows CI to build thanos binary with the version indicating potential next minor release, showing that someone uses non-released binary (which is fine, just better to know this!). +1. Create small PR to `main` (!) to cut CHANGELOG. This helps to maintain new changelog on main. Add entry to CHANGELOG indicating release in progress. This reduces risk for the new PRs to add changelog entries to already released release. + +2. Update `VERSION` file to version one minor version higher than the released one and `dev` suffix. This allows CI to build Thanos binary with the version indicating potential next minor release, showing that someone uses non-released binary (which is fine, just better to know this!). Feel free to mimic following PR: https://github.com/thanos-io/thanos/pull/3861 -1. Update [CHANGELOG file](../CHANGELOG.md) +### Prepare the release -Note that `CHANGELOG.md` should only document changes relevant to users of Thanos, including external API changes, performance improvements, and new features. Do not document changes of internal interfaces, code refactorings and clean-ups, changes to the build process, etc. People interested in these are asked to refer to the git history. Format is described in `CHANGELOG.md`. +1. Create a branch based on the release branch. You will use this branch to include any changes that need to happen as a part of 'cutting' the release. Follow the steps below and commit and resulting changes to this branch. -The whole release from release candidate `rc.0` to actual release should have exactly the same section. We don't separate what have changed between release candidates. +2. Double check and update [CHANGELOG file](../CHANGELOG.md). Note that `CHANGELOG.md` should only document changes relevant to users of Thanos, including external API changes, bug fixes, performance improvements, and new features. Do not document changes of internal interfaces, code refactorings and clean-ups, changes to the build process, etc. People interested in these are asked to refer to the git history. Format is described in `CHANGELOG.md`. + - The whole release from release candidate `rc.0` to actual release should have exactly the same section. We don't separate what have changed between release candidates. -1. Double check backward compatibility: +3. Double check backward compatibility: 1. *In case of version after `v1+.y.z`*, double check if none of the changes break API compatibility. This should be done in PR review process, but double check is good to have. 2. In case of `v0.y.z`, document all incompatibilities in changelog. -2. Double check metric changes: +4. Double check metric changes: 1. Note any changes in the changelog 2. If there were any changes then update the relevant alerting rules and/or dashboards since `thanos-mixin` is part of the repository now -3. Update website's [hugo.yaml](https://github.com/thanos-io/thanos/blob/main/website/hugo.yaml) to have correct links for new release ( add `0.y.z: "/:sections/:filename.md"`). +5. *(Applies only to minor, non-`rc` release)* Update website's [hugo.yaml](https://github.com/thanos-io/thanos/blob/main/website/hugo.yaml) to have correct links for new release ( add `0.y.z: "/:sections/:filename.md"`). -4. Update tutorials: +6. *(Applies only to minor, non-`rc` release)* Update tutorials: 1. Update the Thanos version used in the [tutorials](../tutorials) manifests. 2. In case of any breaking changes or necessary updates adjust the manifests so the tutorial stays up to date. 3. Update the [scripts/quickstart.sh](https://github.com/thanos-io/thanos/blob/main/scripts/quickstart.sh) script if needed. -5. After review, merge the PR and immediately after this tag a version: +7. Open a PR with any changes resulting from the previous steps against the release branch and ask the maintainers to review it. + +### Tag and publish the release + +1. After review and obtaining (an) approval(s), merge the PR and after this tag a version: ```bash tag=$(cat VERSION) @@ -111,28 +119,28 @@ The whole release from release candidate `rc.0` to actual release should have ex Please make sure that you are tagging the merge commit because otherwise GitHub's UI will show that there were more commits after your release. -6. Once a tag is created, the release process through CircleCI will be triggered for this tag. +2. Once a tag is created and pushed, **immediately** create a Github Release using the UI for this tag, as otherwise CircleCI will not be able to upload tarballs for this tag. Go to the releases page of the project, click on the `Draft a new release` button and select the tag you just pushed. Describe release and post relevant entry from changelog. Click `Save draft` **rather** than `Publish release` at this time. (This will prevent the release being visible before it has got the binaries attached to it.) *In case you did not manage to create the draft release before CircleCI run is finished (it will fail on the artifacts upload step in this case), you can re-trigger the run manually from the CircleCI dashboard *after* you created the draft release.* -7. You must create a Github Release using the UI for this tag, as otherwise CircleCI will not be able to upload tarballs for this tag. Also, you must create the Github Release using a Github user that has granted access rights to CircleCI. List of maintainers is available [here](../MAINTAINERS.md) +3. You are also encouraged to include a list of (first time) contributors to the release. You can do this by clicking on `Auto-generate release notes`, which will generate this section for you (edit the notes as required to remove unnecessary parts). -8. Go to the releases page of the project, click on the `Draft a new release` button and select the tag you just pushed. Describe release and post relevant entry from changelog. Click `Save draft` rather than `Publish release` at this time. (This will prevent the release being visible before it has got the binaries attached to it.) +4. Once tarballs are published on release page, you can click `Publish` and release is complete. -9. Once tarballs are published on release page, you can click `Publish` and release is complete. +### Completing the release -10. Announce `#thanos` slack channel. +1. Announce the release on the `#thanos` Slack channel. You are also encouraged to announce the new release on any Thanos social media accounts, such as Twitter (the credentials are available via Thanos' [Keybase](https://keybase.io/) team which includes all maintainers). -11. Pull commits from release branch to main branch for non `rc` releases. Make sure to not modify `VERSION`, it should be still pointing to `version+1-dev` ([TODO to automate this](https://github.com/thanos-io/thanos/issues/4741)) +2. Pull commits from release branch to main branch for non `rc` releases. Make sure to not modify `VERSION`, it should be still pointing to `version+1-dev` ([TODO to automate this](https://github.com/thanos-io/thanos/issues/4741)) -12. After releasing a major version, please cut a release for `kube-thanos` as well. https://github.com/thanos-io/kube-thanos/releases Make sure all the flag changes are reflected in the manifests. Otherwise, the process is the same, except we don't have `rc` for the `kube-thanos`. We do this to make sure we have compatible manifests for each major versions. +3. After releasing a major version, please cut a release for `kube-thanos` as well. https://github.com/thanos-io/kube-thanos/releases Make sure all the flag changes are reflected in the manifests. Otherwise, the process is the same, except we don't have `rc` for the `kube-thanos`. We do this to make sure we have compatible manifests for each major versions. -13. Merge `release-.` branch back to main. This is important for Go modules tooling to make release tags reachable from main branch. +4. Merge `release-.` branch back to main. This is important for Go modules tooling to make release tags reachable from main branch. - - Create `merge-release-.-to-main` branch **from `release-.` branch** locally - - Merge upstream `main` branch into your `merge-release-.-to-main` and resolve conflicts - - Send PR for merging your `merge-release-.-to-main` branch into `main` - - Once approved, merge the PR by using "Merge" commit. - - This can either be done by temporarily enabling "Allow merge commits" option in "Settings > Options". - - Alternatively, this can be done locally by merging `merge-release-.-to-main` branch into `main`, and pushing resulting `main` to upstream repository. This doesn't break `main` branch protection, since PR has been approved already, and it also doesn't require removing the protection. + - Create `merge-release-.-to-main` branch **from `release-.` branch** locally + - Merge upstream `main` branch into your `merge-release-.-to-main` and resolve conflicts + - Open a PR for merging your `merge-release-.-to-main` branch against `main` + - Once approved, merge the PR **by using "Merge" commit**. + - This can either be done by temporarily enabling "Allow merge commits" option in "Settings > Options". + - Alternatively, this can be done locally by merging `merge-release-.-to-main` branch into `main`, and pushing resulting `main` to upstream repository. This doesn't break `main` branch protection, since PR has been approved already, and it also doesn't require removing the protection. ## Pre-releases (release candidates) From afe25faae177395b90f353d71ef96797250f74d4 Mon Sep 17 00:00:00 2001 From: Matej Gera <38492574+matej-g@users.noreply.github.com> Date: Wed, 2 Mar 2022 11:36:26 +0100 Subject: [PATCH 03/24] Bump E2E version (#5187) Signed-off-by: Matej Gera --- go.mod | 2 +- go.sum | 4 +- test/e2e/e2ethanos/service.go | 10 +-- test/e2e/e2ethanos/services.go | 115 ++++++++++++++++++--------------- test/e2e/exemplars_api_test.go | 10 +-- test/e2e/query_test.go | 18 +++--- test/e2e/receive_test.go | 56 ++++++++-------- test/e2e/rule_test.go | 2 +- test/e2e/rules_api_test.go | 2 +- test/e2e/store_gateway_test.go | 4 +- 10 files changed, 117 insertions(+), 106 deletions(-) diff --git a/go.mod b/go.mod index a5edb003cb3..d1c2b2400ba 100644 --- a/go.mod +++ b/go.mod @@ -22,7 +22,7 @@ require ( github.com/chromedp/chromedp v0.5.3 github.com/cortexproject/cortex v1.10.1-0.20211124141505-4e9fc3a2b5ab github.com/davecgh/go-spew v1.1.1 - github.com/efficientgo/e2e v0.11.2-0.20211027134903-67d538984a47 + github.com/efficientgo/e2e v0.11.2-0.20220224081107-b67f7b039363 github.com/efficientgo/tools/core v0.0.0-20210829154005-c7bad8450208 github.com/efficientgo/tools/extkingpin v0.0.0-20210609125236-d73259166f20 github.com/facette/natsort v0.0.0-20181210072756-2cd4dd1e2dcb diff --git a/go.sum b/go.sum index afe1d12b734..a27aef6d506 100644 --- a/go.sum +++ b/go.sum @@ -549,8 +549,8 @@ github.com/eclipse/paho.mqtt.golang v1.2.0/go.mod h1:H9keYFcgq3Qr5OUJm/JZI/i6U7j github.com/edsrzf/mmap-go v0.0.0-20170320065105-0bce6a688712/go.mod h1:YO35OhQPt3KJa3ryjFM5Bs14WD66h8eGKpfaBNrHW5M= github.com/edsrzf/mmap-go v1.0.0 h1:CEBF7HpRnUCSJgGUb5h1Gm7e3VkmVDrR8lvWVLtrOFw= github.com/edsrzf/mmap-go v1.0.0/go.mod h1:YO35OhQPt3KJa3ryjFM5Bs14WD66h8eGKpfaBNrHW5M= -github.com/efficientgo/e2e v0.11.2-0.20211027134903-67d538984a47 h1:k0qDUhOU0KJqKztQYJL1qMBR9nCOntuIRWYwA56Z634= -github.com/efficientgo/e2e v0.11.2-0.20211027134903-67d538984a47/go.mod h1:vDnF4AAEZmO0mvyFIATeDJPFaSRM7ywaOnKd61zaSoE= +github.com/efficientgo/e2e v0.11.2-0.20220224081107-b67f7b039363 h1:wlimY9L7RuHjNugLWsYH1wFTpkonEc3rktYSLx8aXu0= +github.com/efficientgo/e2e v0.11.2-0.20220224081107-b67f7b039363/go.mod h1:vDnF4AAEZmO0mvyFIATeDJPFaSRM7ywaOnKd61zaSoE= github.com/efficientgo/tools/core v0.0.0-20210731122119-5d4a0645ce9a h1:Az9zRvQubUIHE+tHAm0gG7Dwge08V8Q/9uNSIFjFm+A= github.com/efficientgo/tools/core v0.0.0-20210731122119-5d4a0645ce9a/go.mod h1:OmVcnJopJL8d3X3sSXTiypGoUSgFq1aDGmlrdi9dn/M= github.com/efficientgo/tools/extkingpin v0.0.0-20210609125236-d73259166f20 h1:kM/ALyvAnTrwSB+nlKqoKaDnZbInp1YImZvW+gtHwc8= diff --git a/test/e2e/e2ethanos/service.go b/test/e2e/e2ethanos/service.go index 2a8d0bd31f8..0db089391e6 100644 --- a/test/e2e/e2ethanos/service.go +++ b/test/e2e/e2ethanos/service.go @@ -24,7 +24,7 @@ func NewService( readiness *e2e.HTTPReadinessProbe, http, grpc int, otherPorts ...Port, -) *e2e.InstrumentedRunnable { +) e2e.InstrumentedRunnable { return newUninitiatedService(e, name, http, grpc, otherPorts...).Init( e2e.StartOptions{ Image: image, @@ -41,7 +41,7 @@ func newUninitiatedService( name string, http, grpc int, otherPorts ...Port, -) *e2e.FutureInstrumentedRunnable { +) e2e.InstrumentedRunnableBuilder { metricsPorts := "http" ports := map[string]int{ "http": http, @@ -56,15 +56,15 @@ func newUninitiatedService( } } - return e2e.NewInstrumentedRunnable(e, name, ports, metricsPorts) + return e2e.NewInstrumentedRunnable(e, name).WithPorts(ports, metricsPorts) } func initiateService( - service *e2e.FutureInstrumentedRunnable, + service e2e.InstrumentedRunnableBuilder, image string, command e2e.Command, readiness *e2e.HTTPReadinessProbe, -) *e2e.InstrumentedRunnable { +) e2e.InstrumentedRunnable { return service.Init( e2e.StartOptions{ Image: image, diff --git a/test/e2e/e2ethanos/services.go b/test/e2e/e2ethanos/services.go index 88a885523e2..75c5bb16cd2 100644 --- a/test/e2e/e2ethanos/services.go +++ b/test/e2e/e2ethanos/services.go @@ -81,7 +81,7 @@ func defaultPromHttpConfig() string { ` } -func NewPrometheus(e e2e.Environment, name, promConfig, webConfig, promImage string, enableFeatures ...string) (*e2e.InstrumentedRunnable, string, error) { +func NewPrometheus(e e2e.Environment, name, promConfig, webConfig, promImage string, enableFeatures ...string) (e2e.InstrumentedRunnable, string, error) { dir := filepath.Join(e.SharedDir(), "data", "prometheus", name) container := filepath.Join(ContainerSharedDir, "data", "prometheus", name) if err := os.MkdirAll(dir, 0750); err != nil { @@ -119,8 +119,10 @@ func NewPrometheus(e e2e.Environment, name, promConfig, webConfig, promImage str prom := e2e.NewInstrumentedRunnable( e, fmt.Sprintf("prometheus-%s", name), + ).WithPorts( map[string]int{"http": 9090}, - "http").Init( + "http", + ).Init( e2e.StartOptions{ Image: promImage, Command: e2e.NewCommandWithoutEntrypoint("prometheus", args...), @@ -133,11 +135,11 @@ func NewPrometheus(e e2e.Environment, name, promConfig, webConfig, promImage str return prom, container, nil } -func NewPrometheusWithSidecar(e e2e.Environment, name, promConfig, webConfig, promImage, minTime string, enableFeatures ...string) (*e2e.InstrumentedRunnable, *e2e.InstrumentedRunnable, error) { +func NewPrometheusWithSidecar(e e2e.Environment, name, promConfig, webConfig, promImage, minTime string, enableFeatures ...string) (e2e.InstrumentedRunnable, e2e.InstrumentedRunnable, error) { return NewPrometheusWithSidecarCustomImage(e, name, promConfig, webConfig, promImage, minTime, DefaultImage(), enableFeatures...) } -func NewPrometheusWithSidecarCustomImage(e e2e.Environment, name, promConfig, webConfig, promImage, minTime string, sidecarImage string, enableFeatures ...string) (*e2e.InstrumentedRunnable, *e2e.InstrumentedRunnable, error) { +func NewPrometheusWithSidecarCustomImage(e e2e.Environment, name, promConfig, webConfig, promImage, minTime string, sidecarImage string, enableFeatures ...string) (e2e.InstrumentedRunnable, e2e.InstrumentedRunnable, error) { prom, dataDir, err := NewPrometheus(e, name, promConfig, webConfig, promImage, enableFeatures...) if err != nil { return nil, nil, err @@ -150,7 +152,7 @@ func NewPrometheusWithSidecarCustomImage(e e2e.Environment, name, promConfig, we "--http-address": ":8080", "--prometheus.url": "http://" + prom.InternalEndpoint("http"), "--tsdb.path": dataDir, - "--log.level": infoLogLevel, + "--log.level": "debug", } if len(webConfig) > 0 { args["--prometheus.http-client"] = defaultPromHttpConfig() @@ -256,7 +258,7 @@ func (q *QuerierBuilder) WithTracingConfig(tracingConfig string) *QuerierBuilder return q } -func (q *QuerierBuilder) BuildUninitiated() *e2e.FutureInstrumentedRunnable { +func (q *QuerierBuilder) BuildUninitiated() e2e.InstrumentedRunnableBuilder { return newUninitiatedService( q.environment, fmt.Sprintf("querier-%v", q.name), @@ -265,7 +267,7 @@ func (q *QuerierBuilder) BuildUninitiated() *e2e.FutureInstrumentedRunnable { ) } -func (q *QuerierBuilder) Initiate(service *e2e.FutureInstrumentedRunnable, storeAddresses ...string) (*e2e.InstrumentedRunnable, error) { +func (q *QuerierBuilder) Initiate(service e2e.InstrumentedRunnableBuilder, storeAddresses ...string) (e2e.InstrumentedRunnable, error) { q.storeAddresses = storeAddresses args, err := q.collectArgs() if err != nil { @@ -282,7 +284,7 @@ func (q *QuerierBuilder) Initiate(service *e2e.FutureInstrumentedRunnable, store return querier, nil } -func (q *QuerierBuilder) Build() (*e2e.InstrumentedRunnable, error) { +func (q *QuerierBuilder) Build() (e2e.InstrumentedRunnable, error) { args, err := q.collectArgs() if err != nil { return nil, err @@ -386,24 +388,24 @@ func RemoteWriteEndpoint(addr string) string { return fmt.Sprintf("http://%s/api // NewUninitiatedReceiver returns a future receiver that can be initiated. It is useful // for obtaining a receiver address for hashring before the receiver is started. -func NewUninitiatedReceiver(e e2e.Environment, name string) *e2e.FutureInstrumentedRunnable { +func NewUninitiatedReceiver(e e2e.Environment, name string) e2e.InstrumentedRunnableBuilder { return newUninitiatedService(e, fmt.Sprintf("receive-%v", name), 8080, 9091, Port{Name: "remote-write", PortNum: 8081}) } // NewRoutingAndIngestingReceiverFromService creates a Thanos Receive instances from an unitiated service. // It is configured both for ingesting samples and routing samples to other receivers. -func NewRoutingAndIngestingReceiverFromService(service *e2e.FutureInstrumentedRunnable, sharedDir string, replicationFactor int, hashring ...receive.HashringConfig) (*e2e.InstrumentedRunnable, error) { +func NewRoutingAndIngestingReceiverFromService(service e2e.InstrumentedRunnableBuilder, sharedDir string, replicationFactor int, hashring ...receive.HashringConfig) (e2e.InstrumentedRunnable, error) { var localEndpoint string if len(hashring) == 0 { localEndpoint = "0.0.0.0:9091" hashring = []receive.HashringConfig{{Endpoints: []string{localEndpoint}}} } else { - localEndpoint = service.InternalEndpoint("grpc") + localEndpoint = service.Future().InternalEndpoint("grpc") } - dir := filepath.Join(sharedDir, "data", "receive", service.Name()) + dir := filepath.Join(sharedDir, "data", "receive", service.Future().Name()) dataDir := filepath.Join(dir, "data") - container := filepath.Join(ContainerSharedDir, "data", "receive", service.Name()) + container := filepath.Join(ContainerSharedDir, "data", "receive", service.Future().Name()) if err := os.MkdirAll(dataDir, 0750); err != nil { return nil, errors.Wrap(err, "create receive dir") } @@ -417,12 +419,12 @@ func NewRoutingAndIngestingReceiverFromService(service *e2e.FutureInstrumentedRu DefaultImage(), // TODO(bwplotka): BuildArgs should be interface. e2e.NewCommand("receive", e2e.BuildArgs(map[string]string{ - "--debug.name": service.Name(), + "--debug.name": service.Future().Name(), "--grpc-address": ":9091", "--grpc-grace-period": "0s", "--http-address": ":8080", "--remote-write.address": ":8081", - "--label": fmt.Sprintf(`receive="%s"`, service.Name()), + "--label": fmt.Sprintf(`receive="%s"`, service.Future().Name()), "--tsdb.path": filepath.Join(container, "data"), "--log.level": infoLogLevel, "--receive.replication-factor": strconv.Itoa(replicationFactor), @@ -435,18 +437,18 @@ func NewRoutingAndIngestingReceiverFromService(service *e2e.FutureInstrumentedRu return receiver, nil } -func NewRoutingAndIngestingReceiverWithConfigWatcher(service *e2e.FutureInstrumentedRunnable, sharedDir string, replicationFactor int, hashring ...receive.HashringConfig) (*e2e.InstrumentedRunnable, error) { +func NewRoutingAndIngestingReceiverWithConfigWatcher(service e2e.InstrumentedRunnableBuilder, sharedDir string, replicationFactor int, hashring ...receive.HashringConfig) (e2e.InstrumentedRunnable, error) { var localEndpoint string if len(hashring) == 0 { localEndpoint = "0.0.0.0:9091" hashring = []receive.HashringConfig{{Endpoints: []string{localEndpoint}}} } else { - localEndpoint = service.InternalEndpoint("grpc") + localEndpoint = service.Future().InternalEndpoint("grpc") } - dir := filepath.Join(sharedDir, "data", "receive", service.Name()) + dir := filepath.Join(sharedDir, "data", "receive", service.Future().Name()) dataDir := filepath.Join(dir, "data") - container := filepath.Join(ContainerSharedDir, "data", "receive", service.Name()) + container := filepath.Join(ContainerSharedDir, "data", "receive", service.Future().Name()) if err := os.MkdirAll(dataDir, 0750); err != nil { return nil, errors.Wrap(err, "create receive dir") } @@ -464,12 +466,12 @@ func NewRoutingAndIngestingReceiverWithConfigWatcher(service *e2e.FutureInstrume DefaultImage(), // TODO(bwplotka): BuildArgs should be interface. e2e.NewCommand("receive", e2e.BuildArgs(map[string]string{ - "--debug.name": service.Name(), + "--debug.name": service.Future().Name(), "--grpc-address": ":9091", "--grpc-grace-period": "0s", "--http-address": ":8080", "--remote-write.address": ":8081", - "--label": fmt.Sprintf(`receive="%s"`, service.Name()), + "--label": fmt.Sprintf(`receive="%s"`, service.Future().Name()), "--tsdb.path": filepath.Join(container, "data"), "--log.level": infoLogLevel, "--receive.replication-factor": strconv.Itoa(replicationFactor), @@ -484,7 +486,7 @@ func NewRoutingAndIngestingReceiverWithConfigWatcher(service *e2e.FutureInstrume } // NewRoutingReceiver creates a Thanos Receive instance that is only configured to route to other receive instances. It has no local storage. -func NewRoutingReceiver(e e2e.Environment, name string, replicationFactor int, hashring ...receive.HashringConfig) (*e2e.InstrumentedRunnable, error) { +func NewRoutingReceiver(e e2e.Environment, name string, replicationFactor int, hashring ...receive.HashringConfig) (e2e.InstrumentedRunnable, error) { if len(hashring) == 0 { return nil, errors.New("hashring should not be empty for receive-distributor mode") @@ -528,7 +530,7 @@ func NewRoutingReceiver(e e2e.Environment, name string, replicationFactor int, h } // NewIngestingReceiver creates a Thanos Receive instance that is only configured to ingest, not route to other receivers. -func NewIngestingReceiver(e e2e.Environment, name string) (*e2e.InstrumentedRunnable, error) { +func NewIngestingReceiver(e e2e.Environment, name string) (e2e.InstrumentedRunnable, error) { dir := filepath.Join(e.SharedDir(), "data", "receive", name) dataDir := filepath.Join(dir, "data") container := filepath.Join(ContainerSharedDir, "data", "receive", name) @@ -558,15 +560,15 @@ func NewIngestingReceiver(e e2e.Environment, name string) (*e2e.InstrumentedRunn return receiver, nil } -func NewTSDBRuler(e e2e.Environment, name, ruleSubDir string, amCfg []alert.AlertmanagerConfig, queryCfg []httpconfig.Config) (*e2e.InstrumentedRunnable, error) { +func NewTSDBRuler(e e2e.Environment, name, ruleSubDir string, amCfg []alert.AlertmanagerConfig, queryCfg []httpconfig.Config) (e2e.InstrumentedRunnable, error) { return newRuler(e, name, ruleSubDir, amCfg, queryCfg, nil) } -func NewStatelessRuler(e e2e.Environment, name, ruleSubDir string, amCfg []alert.AlertmanagerConfig, queryCfg []httpconfig.Config, remoteWriteCfg []*config.RemoteWriteConfig) (*e2e.InstrumentedRunnable, error) { +func NewStatelessRuler(e e2e.Environment, name, ruleSubDir string, amCfg []alert.AlertmanagerConfig, queryCfg []httpconfig.Config, remoteWriteCfg []*config.RemoteWriteConfig) (e2e.InstrumentedRunnable, error) { return newRuler(e, name, ruleSubDir, amCfg, queryCfg, remoteWriteCfg) } -func newRuler(e e2e.Environment, name, ruleSubDir string, amCfg []alert.AlertmanagerConfig, queryCfg []httpconfig.Config, remoteWriteCfg []*config.RemoteWriteConfig) (*e2e.InstrumentedRunnable, error) { +func newRuler(e e2e.Environment, name, ruleSubDir string, amCfg []alert.AlertmanagerConfig, queryCfg []httpconfig.Config, remoteWriteCfg []*config.RemoteWriteConfig) (e2e.InstrumentedRunnable, error) { dir := filepath.Join(e.SharedDir(), "data", "rule", name) container := filepath.Join(ContainerSharedDir, "data", "rule", name) @@ -624,7 +626,7 @@ func newRuler(e e2e.Environment, name, ruleSubDir string, amCfg []alert.Alertman return ruler, nil } -func NewAlertmanager(e e2e.Environment, name string) (*e2e.InstrumentedRunnable, error) { +func NewAlertmanager(e e2e.Environment, name string) (e2e.InstrumentedRunnable, error) { dir := filepath.Join(e.SharedDir(), "data", "am", name) container := filepath.Join(ContainerSharedDir, "data", "am", name) if err := os.MkdirAll(dir, 0750); err != nil { @@ -644,7 +646,11 @@ receivers: } s := e2e.NewInstrumentedRunnable( - e, fmt.Sprintf("alertmanager-%v", name), map[string]int{"http": 8080}, "http").Init( + e, fmt.Sprintf("alertmanager-%v", name), + ).WithPorts( + map[string]int{"http": 8080}, + "http", + ).Init( e2e.StartOptions{ Image: DefaultAlertmanagerImage(), Command: e2e.NewCommandWithoutEntrypoint("/bin/alertmanager", e2e.BuildArgs(map[string]string{ @@ -664,7 +670,7 @@ receivers: return s, nil } -func NewStoreGW(e e2e.Environment, name string, bucketConfig client.BucketConfig, cacheConfig string, extArgs []string, relabelConfig ...relabel.Config) (*e2e.InstrumentedRunnable, error) { +func NewStoreGW(e e2e.Environment, name string, bucketConfig client.BucketConfig, cacheConfig string, extArgs []string, relabelConfig ...relabel.Config) (e2e.InstrumentedRunnable, error) { dir := filepath.Join(e.SharedDir(), "data", "store", name) container := filepath.Join(ContainerSharedDir, "data", "store", name) if err := os.MkdirAll(dir, 0750); err != nil { @@ -714,7 +720,7 @@ func NewStoreGW(e e2e.Environment, name string, bucketConfig client.BucketConfig return store, nil } -func NewCompactor(e e2e.Environment, name string, bucketConfig client.BucketConfig, relabelConfig []relabel.Config, extArgs ...string) (*e2e.InstrumentedRunnable, error) { +func NewCompactor(e e2e.Environment, name string, bucketConfig client.BucketConfig, relabelConfig []relabel.Config, extArgs ...string) (e2e.InstrumentedRunnable, error) { dir := filepath.Join(e.SharedDir(), "data", "compact", name) container := filepath.Join(ContainerSharedDir, "data", "compact", name) @@ -733,18 +739,22 @@ func NewCompactor(e e2e.Environment, name string, bucketConfig client.BucketConf } compactor := e2e.NewInstrumentedRunnable( - e, fmt.Sprintf("compact-%s", name), map[string]int{"http": 8080}, "http").Init( + e, fmt.Sprintf("compact-%s", name), + ).WithPorts( + map[string]int{"http": 8080}, "http", + ).Init( e2e.StartOptions{ Image: DefaultImage(), Command: e2e.NewCommand("compact", append(e2e.BuildArgs(map[string]string{ - "--debug.name": fmt.Sprintf("compact-%s", name), - "--log.level": infoLogLevel, - "--data-dir": container, - "--objstore.config": string(bktConfigBytes), - "--http-address": ":8080", - "--block-sync-concurrency": "20", - "--selector.relabel-config": string(relabelConfigBytes), - "--wait": "", + "--debug.name": fmt.Sprintf("compact-%s", name), + "--log.level": infoLogLevel, + "--data-dir": container, + "--objstore.config": string(bktConfigBytes), + "--http-address": ":8080", + "--block-sync-concurrency": "50", + "--compact.cleanup-interval": "15s", + "--selector.relabel-config": string(relabelConfigBytes), + "--wait": "", }), extArgs...)...), Readiness: e2e.NewHTTPReadinessProbe("http", "/-/ready", 200, 200), User: strconv.Itoa(os.Getuid()), @@ -755,7 +765,7 @@ func NewCompactor(e e2e.Environment, name string, bucketConfig client.BucketConf return compactor, nil } -func NewQueryFrontend(e e2e.Environment, name, downstreamURL string, cacheConfig queryfrontend.CacheProviderConfig) (*e2e.InstrumentedRunnable, error) { +func NewQueryFrontend(e e2e.Environment, name, downstreamURL string, cacheConfig queryfrontend.CacheProviderConfig) (e2e.InstrumentedRunnable, error) { cacheConfigBytes, err := yaml.Marshal(cacheConfig) if err != nil { return nil, errors.Wrapf(err, "marshal response cache config file: %v", cacheConfig) @@ -770,7 +780,8 @@ func NewQueryFrontend(e e2e.Environment, name, downstreamURL string, cacheConfig }) queryFrontend := e2e.NewInstrumentedRunnable( - e, fmt.Sprintf("query-frontend-%s", name), map[string]int{"http": 8080}, "http").Init( + e, fmt.Sprintf("query-frontend-%s", name), + ).WithPorts(map[string]int{"http": 8080}, "http").Init( e2e.StartOptions{ Image: DefaultImage(), Command: e2e.NewCommand("query-frontend", args...), @@ -783,7 +794,7 @@ func NewQueryFrontend(e e2e.Environment, name, downstreamURL string, cacheConfig return queryFrontend, nil } -func NewReverseProxy(e e2e.Environment, name, tenantID, target string) (*e2e.InstrumentedRunnable, error) { +func NewReverseProxy(e e2e.Environment, name, tenantID, target string) (e2e.InstrumentedRunnable, error) { conf := fmt.Sprintf(` events { worker_connections 1024; @@ -811,7 +822,7 @@ http { return nil, errors.Wrap(err, "creating nginx config file failed") } - nginx := e2e.NewInstrumentedRunnable(e, fmt.Sprintf("nginx-%s", name), map[string]int{"http": 80}, "http").Init( + nginx := e2e.NewInstrumentedRunnable(e, fmt.Sprintf("nginx-%s", name)).WithPorts(map[string]int{"http": 80}, "http").Init( e2e.StartOptions{ Image: "docker.io/nginx:1.21.1-alpine", Volumes: []string{filepath.Join(dir, "/nginx.conf") + ":/etc/nginx/nginx.conf:ro"}, @@ -825,7 +836,7 @@ http { // NewMinio returns minio server, used as a local replacement for S3. // TODO(@matej-g): This is a temporary workaround for https://github.com/efficientgo/e2e/issues/11; // after this is addresses fixed all calls should be replaced with e2edb.NewMinio. -func NewMinio(env e2e.Environment, name, bktName string) (*e2e.InstrumentedRunnable, error) { +func NewMinio(env e2e.Environment, name, bktName string) (e2e.InstrumentedRunnable, error) { image := "minio/minio:RELEASE.2019-12-30T05-45-39Z" minioKESGithubContent := "https://raw.githubusercontent.com/minio/kes/master" commands := []string{ @@ -848,15 +859,15 @@ func NewMinio(env e2e.Environment, name, bktName string) (*e2e.InstrumentedRunna return e2e.NewInstrumentedRunnable( env, name, + ).WithPorts( map[string]int{"https": 8090}, - "https").Init( + "https", + ).Init( e2e.StartOptions{ Image: image, // Create the required bucket before starting minio. - Command: e2e.NewCommandWithoutEntrypoint("sh", "-c", fmt.Sprintf(strings.Join(commands, " && "), minioKESGithubContent, minioKESGithubContent, bktName, 8090)), - //TODO(@clyang82): This is a temporary workaround for https://github.com/efficientgo/e2e/issues/9 - //Readiness: e2e.NewHTTPReadinessProbe("http", "/minio/health/ready", 200, 200), - Readiness: e2e.NewCmdReadinessProbe(e2e.NewCommand("sh", "-c", "sleep 1 && curl -k https://127.0.0.1:8090/minio/health/ready")), + Command: e2e.NewCommandWithoutEntrypoint("sh", "-c", fmt.Sprintf(strings.Join(commands, " && "), minioKESGithubContent, minioKESGithubContent, bktName, 8090)), + Readiness: e2e.NewHTTPSReadinessProbe("https", "/minio/health/ready", 200, 200), EnvVars: map[string]string{ "MINIO_ACCESS_KEY": e2edb.MinioAccessKey, "MINIO_SECRET_KEY": e2edb.MinioSecretKey, @@ -872,8 +883,8 @@ func NewMinio(env e2e.Environment, name, bktName string) (*e2e.InstrumentedRunna ), nil } -func NewMemcached(e e2e.Environment, name string) *e2e.InstrumentedRunnable { - memcached := e2e.NewInstrumentedRunnable(e, fmt.Sprintf("memcached-%s", name), map[string]int{"memcached": 11211}, "memcached").Init( +func NewMemcached(e e2e.Environment, name string) e2e.InstrumentedRunnable { + memcached := e2e.NewInstrumentedRunnable(e, fmt.Sprintf("memcached-%s", name)).WithPorts(map[string]int{"memcached": 11211}, "memcached").Init( e2e.StartOptions{ Image: "docker.io/memcached:1.6.3-alpine", Command: e2e.NewCommand("memcached", []string{"-m 1024", "-I 1m", "-c 1024", "-v"}...), @@ -894,7 +905,7 @@ func NewToolsBucketWeb( minTime string, maxTime string, relabelConfig string, -) (*e2e.InstrumentedRunnable, error) { +) (e2e.InstrumentedRunnable, error) { bktConfigBytes, err := yaml.Marshal(bucketConfig) if err != nil { return nil, errors.Wrapf(err, "generate tools bucket web config file: %v", bucketConfig) diff --git a/test/e2e/exemplars_api_test.go b/test/e2e/exemplars_api_test.go index fdd807fb1d8..5bc2d1532de 100644 --- a/test/e2e/exemplars_api_test.go +++ b/test/e2e/exemplars_api_test.go @@ -26,8 +26,8 @@ const ( func TestExemplarsAPI_Fanout(t *testing.T) { t.Parallel() var ( - prom1, prom2 *e2e.InstrumentedRunnable - sidecar1, sidecar2 *e2e.InstrumentedRunnable + prom1, prom2 e2e.InstrumentedRunnable + sidecar1, sidecar2 e2e.InstrumentedRunnable err error e *e2e.DockerEnvironment ) @@ -42,7 +42,7 @@ func TestExemplarsAPI_Fanout(t *testing.T) { prom1, sidecar1, err = e2ethanos.NewPrometheusWithSidecar( e, "prom1", - defaultPromConfig("ha", 0, "", "", "localhost:9090", qUnitiated.InternalEndpoint("http")), + defaultPromConfig("ha", 0, "", "", "localhost:9090", qUnitiated.Future().InternalEndpoint("http")), "", e2ethanos.DefaultPrometheusImage(), "", @@ -52,7 +52,7 @@ func TestExemplarsAPI_Fanout(t *testing.T) { prom2, sidecar2, err = e2ethanos.NewPrometheusWithSidecar( e, "prom2", - defaultPromConfig("ha", 1, "", "", "localhost:9090", qUnitiated.InternalEndpoint("http")), + defaultPromConfig("ha", 1, "", "", "localhost:9090", qUnitiated.Future().InternalEndpoint("http")), "", e2ethanos.DefaultPrometheusImage(), "", @@ -64,7 +64,7 @@ func TestExemplarsAPI_Fanout(t *testing.T) { config: sampler_type: const sampler_param: 1 - service_name: %s`, qUnitiated.Name()) + service_name: %s`, qUnitiated.Future().Name()) stores := []string{sidecar1.InternalEndpoint("grpc"), sidecar2.InternalEndpoint("grpc")} diff --git a/test/e2e/query_test.go b/test/e2e/query_test.go index 481ae091f71..089750dbed7 100644 --- a/test/e2e/query_test.go +++ b/test/e2e/query_test.go @@ -165,7 +165,7 @@ func TestQuery(t *testing.T) { prom1, sidecar1, err := e2ethanos.NewPrometheusWithSidecar(e, "alone", defaultPromConfig("prom-alone", 0, "", ""), "", e2ethanos.DefaultPrometheusImage(), "") testutil.Ok(t, err) - prom2, sidecar2, err := e2ethanos.NewPrometheusWithSidecar(e, "remote-and-sidecar", defaultPromConfig("prom-both-remote-write-and-sidecar", 1234, e2ethanos.RemoteWriteEndpoint(receiver.InternalEndpoint("remote-write")), ""), "", e2ethanos.DefaultPrometheusImage(), "") + prom2, sidecar2, err := e2ethanos.NewPrometheusWithSidecar(e, "remote-and-sidecar", defaultPromConfig("prom-both-remote-write-and-sidecar", 1234, e2ethanos.RemoteWriteEndpoint(receiver.Future().InternalEndpoint("remote-write")), ""), "", e2ethanos.DefaultPrometheusImage(), "") testutil.Ok(t, err) prom3, sidecar3, err := e2ethanos.NewPrometheusWithSidecar(e, "ha1", defaultPromConfig("prom-ha", 0, "", filepath.Join(e2ethanos.ContainerSharedDir, "", "*.yaml")), "", e2ethanos.DefaultPrometheusImage(), "") testutil.Ok(t, err) @@ -174,7 +174,7 @@ func TestQuery(t *testing.T) { testutil.Ok(t, e2e.StartAndWaitReady(prom1, sidecar1, prom2, sidecar2, prom3, sidecar3, prom4, sidecar4)) // Querier. Both fileSD and directly by flags. - q, err := e2ethanos.NewQuerierBuilder(e, "1", sidecar1.InternalEndpoint("grpc"), sidecar2.InternalEndpoint("grpc"), receiver.InternalEndpoint("grpc")). + q, err := e2ethanos.NewQuerierBuilder(e, "1", sidecar1.InternalEndpoint("grpc"), sidecar2.InternalEndpoint("grpc"), receiver.Future().InternalEndpoint("grpc")). WithFileSDStoreAddresses(sidecar3.InternalEndpoint("grpc"), sidecar4.InternalEndpoint("grpc")).Build() testutil.Ok(t, err) testutil.Ok(t, e2e.StartAndWaitReady(q)) @@ -319,11 +319,11 @@ func TestQueryLabelNames(t *testing.T) { prom1, sidecar1, err := e2ethanos.NewPrometheusWithSidecar(e, "alone", defaultPromConfig("prom-alone", 0, "", ""), "", e2ethanos.DefaultPrometheusImage(), "") testutil.Ok(t, err) - prom2, sidecar2, err := e2ethanos.NewPrometheusWithSidecar(e, "remote-and-sidecar", defaultPromConfig("prom-both-remote-write-and-sidecar", 1234, e2ethanos.RemoteWriteEndpoint(receiver.InternalEndpoint("remote-write")), ""), "", e2ethanos.DefaultPrometheusImage(), "") + prom2, sidecar2, err := e2ethanos.NewPrometheusWithSidecar(e, "remote-and-sidecar", defaultPromConfig("prom-both-remote-write-and-sidecar", 1234, e2ethanos.RemoteWriteEndpoint(receiver.Future().InternalEndpoint("remote-write")), ""), "", e2ethanos.DefaultPrometheusImage(), "") testutil.Ok(t, err) testutil.Ok(t, e2e.StartAndWaitReady(prom1, sidecar1, prom2, sidecar2)) - q, err := e2ethanos.NewQuerierBuilder(e, "1", sidecar1.InternalEndpoint("grpc"), sidecar2.InternalEndpoint("grpc"), receiver.InternalEndpoint("grpc")).Build() + q, err := e2ethanos.NewQuerierBuilder(e, "1", sidecar1.InternalEndpoint("grpc"), sidecar2.InternalEndpoint("grpc"), receiver.Future().InternalEndpoint("grpc")).Build() testutil.Ok(t, err) testutil.Ok(t, e2e.StartAndWaitReady(q)) @@ -371,11 +371,11 @@ func TestQueryLabelValues(t *testing.T) { prom1, sidecar1, err := e2ethanos.NewPrometheusWithSidecar(e, "alone", defaultPromConfig("prom-alone", 0, "", ""), "", e2ethanos.DefaultPrometheusImage(), "") testutil.Ok(t, err) - prom2, sidecar2, err := e2ethanos.NewPrometheusWithSidecar(e, "remote-and-sidecar", defaultPromConfig("prom-both-remote-write-and-sidecar", 1234, e2ethanos.RemoteWriteEndpoint(receiver.InternalEndpoint("remote-write")), ""), "", e2ethanos.DefaultPrometheusImage(), "") + prom2, sidecar2, err := e2ethanos.NewPrometheusWithSidecar(e, "remote-and-sidecar", defaultPromConfig("prom-both-remote-write-and-sidecar", 1234, e2ethanos.RemoteWriteEndpoint(receiver.Future().InternalEndpoint("remote-write")), ""), "", e2ethanos.DefaultPrometheusImage(), "") testutil.Ok(t, err) testutil.Ok(t, e2e.StartAndWaitReady(prom1, sidecar1, prom2, sidecar2)) - q, err := e2ethanos.NewQuerierBuilder(e, "1", sidecar1.InternalEndpoint("grpc"), sidecar2.InternalEndpoint("grpc"), receiver.InternalEndpoint("grpc")).Build() + q, err := e2ethanos.NewQuerierBuilder(e, "1", sidecar1.InternalEndpoint("grpc"), sidecar2.InternalEndpoint("grpc"), receiver.Future().InternalEndpoint("grpc")).Build() testutil.Ok(t, err) testutil.Ok(t, e2e.StartAndWaitReady(q)) @@ -470,7 +470,7 @@ func TestQueryCompatibilityWithPreInfoAPI(t *testing.T) { p1, s1, err := e2ethanos.NewPrometheusWithSidecarCustomImage( e, "p1", - defaultPromConfig("p1", 0, "", filepath.Join(e2ethanos.ContainerSharedDir, promRulesSubDir, "*.yaml"), "localhost:9090", qUninit.InternalEndpoint("http")), + defaultPromConfig("p1", 0, "", filepath.Join(e2ethanos.ContainerSharedDir, promRulesSubDir, "*.yaml"), "localhost:9090", qUninit.Future().InternalEndpoint("http")), "", e2ethanos.DefaultPrometheusImage(), "", @@ -490,7 +490,7 @@ func TestQueryCompatibilityWithPreInfoAPI(t *testing.T) { config: sampler_type: const sampler_param: 1 - service_name: %s`, qUninit.Name())). // Use fake tracing config to trigger exemplar. + service_name: %s`, qUninit.Future().Name())). // Use fake tracing config to trigger exemplar. WithImage(tcase.queryImage). Initiate(qUninit, s1.InternalEndpoint("grpc")) testutil.Ok(t, err) @@ -1088,7 +1088,7 @@ func queryExemplars(t *testing.T, ctx context.Context, addr, q string, start, en })) } -func synthesizeSamples(ctx context.Context, prometheus *e2e.InstrumentedRunnable, testSamples []fakeMetricSample) error { +func synthesizeSamples(ctx context.Context, prometheus e2e.InstrumentedRunnable, testSamples []fakeMetricSample) error { samples := make([]model.Sample, len(testSamples)) for i, s := range testSamples { samples[i] = newSample(s) diff --git a/test/e2e/receive_test.go b/test/e2e/receive_test.go index 12f69c849b2..fc44ac10d04 100644 --- a/test/e2e/receive_test.go +++ b/test/e2e/receive_test.go @@ -346,9 +346,9 @@ func TestReceive(t *testing.T) { h := receive.HashringConfig{ Endpoints: []string{ - r1.InternalEndpoint("grpc"), - r2.InternalEndpoint("grpc"), - r3.InternalEndpoint("grpc"), + r1.Future().InternalEndpoint("grpc"), + r2.Future().InternalEndpoint("grpc"), + r3.Future().InternalEndpoint("grpc"), }, } @@ -361,15 +361,15 @@ func TestReceive(t *testing.T) { testutil.Ok(t, err) testutil.Ok(t, e2e.StartAndWaitReady(r1Runnable, r2Runnable, r3Runnable)) - prom1, _, err := e2ethanos.NewPrometheus(e, "1", defaultPromConfig("prom1", 0, e2ethanos.RemoteWriteEndpoint(r1.InternalEndpoint("remote-write")), ""), "", e2ethanos.DefaultPrometheusImage()) + prom1, _, err := e2ethanos.NewPrometheus(e, "1", defaultPromConfig("prom1", 0, e2ethanos.RemoteWriteEndpoint(r1.Future().InternalEndpoint("remote-write")), ""), "", e2ethanos.DefaultPrometheusImage()) testutil.Ok(t, err) - prom2, _, err := e2ethanos.NewPrometheus(e, "2", defaultPromConfig("prom2", 0, e2ethanos.RemoteWriteEndpoint(r2.InternalEndpoint("remote-write")), ""), "", e2ethanos.DefaultPrometheusImage()) + prom2, _, err := e2ethanos.NewPrometheus(e, "2", defaultPromConfig("prom2", 0, e2ethanos.RemoteWriteEndpoint(r2.Future().InternalEndpoint("remote-write")), ""), "", e2ethanos.DefaultPrometheusImage()) testutil.Ok(t, err) - prom3, _, err := e2ethanos.NewPrometheus(e, "3", defaultPromConfig("prom3", 0, e2ethanos.RemoteWriteEndpoint(r3.InternalEndpoint("remote-write")), ""), "", e2ethanos.DefaultPrometheusImage()) + prom3, _, err := e2ethanos.NewPrometheus(e, "3", defaultPromConfig("prom3", 0, e2ethanos.RemoteWriteEndpoint(r3.Future().InternalEndpoint("remote-write")), ""), "", e2ethanos.DefaultPrometheusImage()) testutil.Ok(t, err) testutil.Ok(t, e2e.StartAndWaitReady(prom1, prom2, prom3)) - q, err := e2ethanos.NewQuerierBuilder(e, "1", r1.InternalEndpoint("grpc"), r2.InternalEndpoint("grpc"), r3.InternalEndpoint("grpc")).Build() + q, err := e2ethanos.NewQuerierBuilder(e, "1", r1.Future().InternalEndpoint("grpc"), r2.Future().InternalEndpoint("grpc"), r3.Future().InternalEndpoint("grpc")).Build() testutil.Ok(t, err) testutil.Ok(t, e2e.StartAndWaitReady(q)) @@ -418,9 +418,9 @@ func TestReceive(t *testing.T) { h := receive.HashringConfig{ Endpoints: []string{ - r1.InternalEndpoint("grpc"), - r2.InternalEndpoint("grpc"), - r3.InternalEndpoint("grpc"), + r1.Future().InternalEndpoint("grpc"), + r2.Future().InternalEndpoint("grpc"), + r3.Future().InternalEndpoint("grpc"), }, } @@ -434,15 +434,15 @@ func TestReceive(t *testing.T) { testutil.Ok(t, err) testutil.Ok(t, e2e.StartAndWaitReady(r1Runnable, r2Runnable, r3Runnable)) - prom1, _, err := e2ethanos.NewPrometheus(e, "1", defaultPromConfig("prom1", 0, e2ethanos.RemoteWriteEndpoint(r1.InternalEndpoint("remote-write")), ""), "", e2ethanos.DefaultPrometheusImage()) + prom1, _, err := e2ethanos.NewPrometheus(e, "1", defaultPromConfig("prom1", 0, e2ethanos.RemoteWriteEndpoint(r1.Future().InternalEndpoint("remote-write")), ""), "", e2ethanos.DefaultPrometheusImage()) testutil.Ok(t, err) - prom2, _, err := e2ethanos.NewPrometheus(e, "2", defaultPromConfig("prom2", 0, e2ethanos.RemoteWriteEndpoint(r2.InternalEndpoint("remote-write")), ""), "", e2ethanos.DefaultPrometheusImage()) + prom2, _, err := e2ethanos.NewPrometheus(e, "2", defaultPromConfig("prom2", 0, e2ethanos.RemoteWriteEndpoint(r2.Future().InternalEndpoint("remote-write")), ""), "", e2ethanos.DefaultPrometheusImage()) testutil.Ok(t, err) - prom3, _, err := e2ethanos.NewPrometheus(e, "3", defaultPromConfig("prom3", 0, e2ethanos.RemoteWriteEndpoint(r3.InternalEndpoint("remote-write")), ""), "", e2ethanos.DefaultPrometheusImage()) + prom3, _, err := e2ethanos.NewPrometheus(e, "3", defaultPromConfig("prom3", 0, e2ethanos.RemoteWriteEndpoint(r3.Future().InternalEndpoint("remote-write")), ""), "", e2ethanos.DefaultPrometheusImage()) testutil.Ok(t, err) testutil.Ok(t, e2e.StartAndWaitReady(prom1, prom2, prom3)) - q, err := e2ethanos.NewQuerierBuilder(e, "1", r1.InternalEndpoint("grpc"), r2.InternalEndpoint("grpc"), r3.InternalEndpoint("grpc")).Build() + q, err := e2ethanos.NewQuerierBuilder(e, "1", r1.Future().InternalEndpoint("grpc"), r2.Future().InternalEndpoint("grpc"), r3.Future().InternalEndpoint("grpc")).Build() testutil.Ok(t, err) testutil.Ok(t, e2e.StartAndWaitReady(q)) @@ -496,9 +496,9 @@ func TestReceive(t *testing.T) { h := receive.HashringConfig{ Endpoints: []string{ - r1.InternalEndpoint("grpc"), - r2.InternalEndpoint("grpc"), - r3.InternalEndpoint("grpc"), + r1.Future().InternalEndpoint("grpc"), + r2.Future().InternalEndpoint("grpc"), + r3.Future().InternalEndpoint("grpc"), }, } @@ -511,11 +511,11 @@ func TestReceive(t *testing.T) { testutil.Ok(t, err) testutil.Ok(t, e2e.StartAndWaitReady(r1Runnable, r2Runnable, r3Runnable)) - prom1, _, err := e2ethanos.NewPrometheus(e, "1", defaultPromConfig("prom1", 0, e2ethanos.RemoteWriteEndpoint(r1.InternalEndpoint("remote-write")), ""), "", e2ethanos.DefaultPrometheusImage()) + prom1, _, err := e2ethanos.NewPrometheus(e, "1", defaultPromConfig("prom1", 0, e2ethanos.RemoteWriteEndpoint(r1.Future().InternalEndpoint("remote-write")), ""), "", e2ethanos.DefaultPrometheusImage()) testutil.Ok(t, err) testutil.Ok(t, e2e.StartAndWaitReady(prom1)) - q, err := e2ethanos.NewQuerierBuilder(e, "1", r1.InternalEndpoint("grpc"), r2.InternalEndpoint("grpc"), r3.InternalEndpoint("grpc")).Build() + q, err := e2ethanos.NewQuerierBuilder(e, "1", r1.Future().InternalEndpoint("grpc"), r2.Future().InternalEndpoint("grpc"), r3.Future().InternalEndpoint("grpc")).Build() testutil.Ok(t, err) testutil.Ok(t, e2e.StartAndWaitReady(q)) @@ -568,9 +568,9 @@ func TestReceive(t *testing.T) { h := receive.HashringConfig{ Endpoints: []string{ - r1.InternalEndpoint("grpc"), - r2.InternalEndpoint("grpc"), - r3.InternalEndpoint("grpc"), + r1.Future().InternalEndpoint("grpc"), + r2.Future().InternalEndpoint("grpc"), + r3.Future().InternalEndpoint("grpc"), }, } @@ -581,11 +581,11 @@ func TestReceive(t *testing.T) { testutil.Ok(t, err) testutil.Ok(t, e2e.StartAndWaitReady(r1Runnable, r2Runnable)) - prom1, _, err := e2ethanos.NewPrometheus(e, "1", defaultPromConfig("prom1", 0, e2ethanos.RemoteWriteEndpoint(r1.InternalEndpoint("remote-write")), ""), "", e2ethanos.DefaultPrometheusImage()) + prom1, _, err := e2ethanos.NewPrometheus(e, "1", defaultPromConfig("prom1", 0, e2ethanos.RemoteWriteEndpoint(r1.Future().InternalEndpoint("remote-write")), ""), "", e2ethanos.DefaultPrometheusImage()) testutil.Ok(t, err) testutil.Ok(t, e2e.StartAndWaitReady(prom1)) - q, err := e2ethanos.NewQuerierBuilder(e, "1", r1.InternalEndpoint("grpc"), r2.InternalEndpoint("grpc")).Build() + q, err := e2ethanos.NewQuerierBuilder(e, "1", r1.Future().InternalEndpoint("grpc"), r2.Future().InternalEndpoint("grpc")).Build() testutil.Ok(t, err) testutil.Ok(t, e2e.StartAndWaitReady(q)) @@ -625,7 +625,7 @@ func TestReceive(t *testing.T) { h := receive.HashringConfig{ Endpoints: []string{ - r1.InternalEndpoint("grpc"), + r1.Future().InternalEndpoint("grpc"), }, } @@ -634,9 +634,9 @@ func TestReceive(t *testing.T) { testutil.Ok(t, err) testutil.Ok(t, e2e.StartAndWaitReady(r1Runnable)) - rp1, err := e2ethanos.NewReverseProxy(e, "1", "tenant-1", "http://"+r1.InternalEndpoint("remote-write")) + rp1, err := e2ethanos.NewReverseProxy(e, "1", "tenant-1", "http://"+r1.Future().InternalEndpoint("remote-write")) testutil.Ok(t, err) - rp2, err := e2ethanos.NewReverseProxy(e, "2", "tenant-2", "http://"+r1.InternalEndpoint("remote-write")) + rp2, err := e2ethanos.NewReverseProxy(e, "2", "tenant-2", "http://"+r1.Future().InternalEndpoint("remote-write")) testutil.Ok(t, err) testutil.Ok(t, e2e.StartAndWaitReady(rp1, rp2)) @@ -646,7 +646,7 @@ func TestReceive(t *testing.T) { testutil.Ok(t, err) testutil.Ok(t, e2e.StartAndWaitReady(prom1, prom2)) - q, err := e2ethanos.NewQuerierBuilder(e, "1", r1.InternalEndpoint("grpc")).Build() + q, err := e2ethanos.NewQuerierBuilder(e, "1", r1.Future().InternalEndpoint("grpc")).Build() testutil.Ok(t, err) testutil.Ok(t, e2e.StartAndWaitReady(q)) ctx, cancel := context.WithTimeout(context.Background(), 3*time.Minute) diff --git a/test/e2e/rule_test.go b/test/e2e/rule_test.go index dfffc57c782..a256e315ae4 100644 --- a/test/e2e/rule_test.go +++ b/test/e2e/rule_test.go @@ -152,7 +152,7 @@ func reloadRulesHTTP(t *testing.T, ctx context.Context, endpoint string) { testutil.Equals(t, 200, resp.StatusCode) } -func reloadRulesSignal(t *testing.T, r *e2e.InstrumentedRunnable) { +func reloadRulesSignal(t *testing.T, r e2e.InstrumentedRunnable) { c := e2e.NewCommand("kill", "-1", "1") _, _, err := r.Exec(c) testutil.Ok(t, err) diff --git a/test/e2e/rules_api_test.go b/test/e2e/rules_api_test.go index db551dd8c16..31a530fe472 100644 --- a/test/e2e/rules_api_test.go +++ b/test/e2e/rules_api_test.go @@ -71,7 +71,7 @@ func TestRulesAPI_Fanout(t *testing.T) { queryCfg := []httpconfig.Config{ { EndpointsConfig: httpconfig.EndpointsConfig{ - StaticAddresses: []string{qUninit.InternalEndpoint("http")}, + StaticAddresses: []string{qUninit.Future().InternalEndpoint("http")}, Scheme: "http", }, }, diff --git a/test/e2e/store_gateway_test.go b/test/e2e/store_gateway_test.go index 13eb6d07ef0..8c034b7a2f6 100644 --- a/test/e2e/store_gateway_test.go +++ b/test/e2e/store_gateway_test.go @@ -476,7 +476,7 @@ metafile_content_ttl: 0s` // Wait for store to sync blocks. // thanos_blocks_meta_synced: 1x loadedMeta 0x labelExcludedMeta 0x TooFreshMeta. - for _, st := range []*e2e.InstrumentedRunnable{store1, store2, store3} { + for _, st := range []e2e.InstrumentedRunnable{store1, store2, store3} { t.Run(st.Name(), func(t *testing.T) { testutil.Ok(t, st.WaitSumMetrics(e2e.Equals(1), "thanos_blocks_meta_synced")) testutil.Ok(t, st.WaitSumMetrics(e2e.Equals(0), "thanos_blocks_meta_sync_failures_total")) @@ -502,7 +502,7 @@ metafile_content_ttl: 0s` }, ) - for _, st := range []*e2e.InstrumentedRunnable{store1, store2, store3} { + for _, st := range []e2e.InstrumentedRunnable{store1, store2, store3} { testutil.Ok(t, st.WaitSumMetricsWithOptions(e2e.Greater(0), []string{`thanos_cache_groupcache_loads_total`})) testutil.Ok(t, st.WaitSumMetricsWithOptions(e2e.Greater(0), []string{`thanos_store_bucket_cache_operation_hits_total`}, e2e.WithLabelMatchers(matchers.MustNewMatcher(matchers.MatchEqual, "config", "chunks")))) } From 34e6527fba44fe086836b4b03b11521b89e25825 Mon Sep 17 00:00:00 2001 From: Matej Gera <38492574+matej-g@users.noreply.github.com> Date: Wed, 2 Mar 2022 15:37:59 +0100 Subject: [PATCH 04/24] Store: Fix data race in advertised label set in bucket store (#5204) * Fix info data race Signed-off-by: Matej Gera * Fix lint Signed-off-by: Matej Gera --- pkg/info/info.go | 37 +++++++++---------------------------- pkg/store/bucket.go | 6 +++--- test/e2e/query_test.go | 2 +- 3 files changed, 13 insertions(+), 32 deletions(-) diff --git a/pkg/info/info.go b/pkg/info/info.go index 4c4beee5045..40df172a68f 100644 --- a/pkg/info/info.go +++ b/pkg/info/info.go @@ -35,6 +35,13 @@ func NewInfoServer( ) *InfoServer { srv := &InfoServer{ component: component, + // By default, do not return info for any API. + getLabelSet: func() []labelpb.ZLabelSet { return nil }, + getStoreInfo: func() *infopb.StoreInfo { return nil }, + getExemplarsInfo: func() *infopb.ExemplarsInfo { return nil }, + getRulesInfo: func() *infopb.RulesInfo { return nil }, + getTargetsInfo: func() *infopb.TargetsInfo { return nil }, + getMetricMetadataInfo: func() *infopb.MetricMetadataInfo { return nil }, } for _, o := range options { @@ -146,31 +153,7 @@ func RegisterInfoServer(infoSrv infopb.InfoServer) func(*grpc.Server) { // Info returns the information about label set and available APIs exposed by the component. func (srv *InfoServer) Info(ctx context.Context, req *infopb.InfoRequest) (*infopb.InfoResponse, error) { - if srv.getLabelSet == nil { - srv.getLabelSet = func() []labelpb.ZLabelSet { return nil } - } - - if srv.getStoreInfo == nil { - srv.getStoreInfo = func() *infopb.StoreInfo { return nil } - } - - if srv.getExemplarsInfo == nil { - srv.getExemplarsInfo = func() *infopb.ExemplarsInfo { return nil } - } - - if srv.getRulesInfo == nil { - srv.getRulesInfo = func() *infopb.RulesInfo { return nil } - } - - if srv.getTargetsInfo == nil { - srv.getTargetsInfo = func() *infopb.TargetsInfo { return nil } - } - - if srv.getMetricMetadataInfo == nil { - srv.getMetricMetadataInfo = func() *infopb.MetricMetadataInfo { return nil } - } - - resp := &infopb.InfoResponse{ + return &infopb.InfoResponse{ LabelSets: srv.getLabelSet(), ComponentType: srv.component, Store: srv.getStoreInfo(), @@ -178,7 +161,5 @@ func (srv *InfoServer) Info(ctx context.Context, req *infopb.InfoRequest) (*info Rules: srv.getRulesInfo(), Targets: srv.getTargetsInfo(), MetricMetadata: srv.getMetricMetadataInfo(), - } - - return resp, nil + }, nil } diff --git a/pkg/store/bucket.go b/pkg/store/bucket.go index 0be444d24ab..679c245fe67 100644 --- a/pkg/store/bucket.go +++ b/pkg/store/bucket.go @@ -684,7 +684,9 @@ func (s *BucketStore) TimeRange() (mint, maxt int64) { } func (s *BucketStore) LabelSet() []labelpb.ZLabelSet { + s.mtx.RLock() labelSets := s.advLabelSets + s.mtx.RUnlock() if s.enableCompatibilityLabel && len(labelSets) > 0 { labelSets = append(labelSets, labelpb.ZLabelSet{Labels: []labelpb.ZLabel{{Name: CompatibilityTypeLabelName, Value: "store"}}}) @@ -700,11 +702,9 @@ func (s *BucketStore) Info(context.Context, *storepb.InfoRequest) (*storepb.Info StoreType: component.Store.ToProto(), MinTime: mint, MaxTime: maxt, + LabelSets: s.LabelSet(), } - s.mtx.RLock() - res.LabelSets = s.LabelSet() - s.mtx.RUnlock() return res, nil } diff --git a/test/e2e/query_test.go b/test/e2e/query_test.go index 089750dbed7..e34ec52fb4f 100644 --- a/test/e2e/query_test.go +++ b/test/e2e/query_test.go @@ -120,7 +120,7 @@ func sortResults(res model.Vector) { func TestSidecarNotReady(t *testing.T) { t.Parallel() - e, err := e2e.NewDockerEnvironment("e2e_test_query") + e, err := e2e.NewDockerEnvironment("e2e_test_query_sidecar_not_ready") testutil.Ok(t, err) t.Cleanup(e2ethanos.CleanScenario(t, e)) From 4af52f231fc57b624db80a02bce93f4ec73d8012 Mon Sep 17 00:00:00 2001 From: Ben Ye Date: Thu, 3 Mar 2022 00:17:18 -0800 Subject: [PATCH 05/24] Append ruler labels as external labels when using stateless ruler mode (#5205) * append ruler labels as external labels when using stateless ruler mode Signed-off-by: Ben Ye * update changelog Signed-off-by: Ben Ye --- CHANGELOG.md | 1 + cmd/thanos/rule.go | 4 +++- test/e2e/rule_test.go | 2 ++ 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 848819051f1..0bd063c59e6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,7 @@ We use *breaking :warning:* to mark changes that are not backward compatible (re ### Fixed ### Changed +- [#5205](https://github.com/thanos-io/thanos/pull/5205) Rule: Add ruler labels as external labels in stateless ruler mode. ### Removed diff --git a/cmd/thanos/rule.go b/cmd/thanos/rule.go index dd9256c95e2..47414f495f0 100644 --- a/cmd/thanos/rule.go +++ b/cmd/thanos/rule.go @@ -363,7 +363,9 @@ func runRule( return 0, nil }, walDir, 1*time.Minute, nil) if err := remoteStore.ApplyConfig(&config.Config{ - GlobalConfig: config.DefaultGlobalConfig, + GlobalConfig: config.GlobalConfig{ + ExternalLabels: labelsTSDBToProm(conf.lset), + }, RemoteWriteConfigs: rwCfg.RemoteWriteConfigs, }); err != nil { return errors.Wrap(err, "applying config to remote storage") diff --git a/test/e2e/rule_test.go b/test/e2e/rule_test.go index a256e315ae4..f4e1f6f2c01 100644 --- a/test/e2e/rule_test.go +++ b/test/e2e/rule_test.go @@ -539,12 +539,14 @@ func TestRule_CanRemoteWriteData(t *testing.T) { "__name__": "test_absent_metric", "job": "thanos-receive", "receive": "1", + "replica": "1", "tenant_id": "default-tenant", }, { "__name__": "test_absent_metric", "job": "thanos-receive", "receive": "2", + "replica": "1", "tenant_id": "default-tenant", }, }) From 4955c01eb563e1d6377cacfab19f329a176c8a10 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Giedrius=20Statkevi=C4=8Dius?= Date: Thu, 3 Mar 2022 12:05:21 +0200 Subject: [PATCH 06/24] cache: add timeout for groupcache's fetch operation (#5206) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * cache: add timeout for groupcache's fetch operation Add a timeout for groupcache's fetch operation. It is useful when there are network errors - if loading from a peer fails then we still might have a chance to be able to load data from remote object storage ourselves. Signed-off-by: Giedrius Statkevičius * CHANGELOG: add entry Signed-off-by: Giedrius Statkevičius * cache: add yaml tag for new field Signed-off-by: Giedrius Statkevičius * cache: bump default timeout, improve docs Signed-off-by: Giedrius Statkevičius * docs: make changes according to Matej's suggestions Signed-off-by: Giedrius Statkevičius --- CHANGELOG.md | 2 ++ docs/components/store.md | 3 +++ pkg/cache/groupcache.go | 12 ++++++++++++ 3 files changed, 17 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0bd063c59e6..83e9e0fe636 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,7 +13,9 @@ We use *breaking :warning:* to mark changes that are not backward compatible (re ### Fixed ### Changed + - [#5205](https://github.com/thanos-io/thanos/pull/5205) Rule: Add ruler labels as external labels in stateless ruler mode. +- [#5206](https://github.com/thanos-io/thanos/pull/5206) Cache: add timeout for groupcache's fetch operation ### Removed diff --git a/docs/components/store.md b/docs/components/store.md index 73fbee210ac..1162ad99461 100644 --- a/docs/components/store.md +++ b/docs/components/store.md @@ -429,6 +429,7 @@ config: - http://10.123.22.100:8080 groupcache_group: test_group dns_interval: 1s + timeout: 2s ``` In this case, three Thanos Store nodes are running in the same group meaning that they all point to the same remote object storage. @@ -441,6 +442,8 @@ In the `peers` section it is possible to use the prefix form to automatically lo Note that there must be no trailing slash in the `peers` configuration i.e. one of the strings must be identical to `self_url` and others should have the same form. Without this, loading data from peers may fail. +If timeout is set to zero then there is no timeout for fetching and fetching's lifetime is equal to the lifetime to the original request's lifetime. It is recommended to keep it higher than zero. It is generally preferred to keep this value higher because the fetching operation potentially includes loading of data from remote object storage. + ## Index Header In order to query series inside blocks from object storage, Store Gateway has to know certain initial info from each block index. In order to achieve so, on startup the Gateway builds an `index-header` for each block and stores it on local disk; such `index-header` is build by downloading specific pieces of original block's index, stored on local disk and then mmaped and used by Store Gateway. diff --git a/pkg/cache/groupcache.go b/pkg/cache/groupcache.go index 19c8c70a867..81d11504ac3 100644 --- a/pkg/cache/groupcache.go +++ b/pkg/cache/groupcache.go @@ -36,6 +36,7 @@ type Groupcache struct { galaxy *galaxycache.Galaxy universe *galaxycache.Universe logger log.Logger + timeout time.Duration } // GroupcacheConfig holds the in-memory cache config. @@ -59,6 +60,9 @@ type GroupcacheConfig struct { // How often we should resolve the addresses. DNSInterval time.Duration `yaml:"dns_interval"` + + // Timeout specifies the read/write timeout. + Timeout time.Duration `yaml:"timeout"` } var ( @@ -66,6 +70,7 @@ var ( MaxSize: 250 * 1024 * 1024, DNSSDResolver: dns.GolangResolverType, DNSInterval: 1 * time.Minute, + Timeout: 2 * time.Second, } ) @@ -255,6 +260,7 @@ func NewGroupcacheWithConfig(logger log.Logger, reg prometheus.Registerer, conf logger: logger, galaxy: galaxy, universe: universe, + timeout: conf.Timeout, }, nil } @@ -265,6 +271,12 @@ func (c *Groupcache) Store(ctx context.Context, data map[string][]byte, ttl time func (c *Groupcache) Fetch(ctx context.Context, keys []string) map[string][]byte { data := map[string][]byte{} + if c.timeout != 0 { + timeoutCtx, cancel := context.WithTimeout(ctx, c.timeout) + ctx = timeoutCtx + defer cancel() + } + for _, k := range keys { codec := galaxycache.ByteCodec{} From c213f73a56894546677d7114c1ac29da86e4e567 Mon Sep 17 00:00:00 2001 From: Bartlomiej Plotka Date: Sat, 5 Mar 2022 12:48:27 +0100 Subject: [PATCH 07/24] website: Add Ukraine support banner. (#5214) Signed-off-by: Bartlomiej Plotka --- website/layouts/index.html | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/website/layouts/index.html b/website/layouts/index.html index a44b3b8aad7..389676887f1 100644 --- a/website/layouts/index.html +++ b/website/layouts/index.html @@ -43,6 +43,13 @@

Open source, highly available Prometheus setup with long term s +
+ The Thanos Team strongly condemns Putin's illegal invasion of Ukraine. + Please consider donating to a humanitarian aid actions such as + Polish National Campaign to Support Ukraine + or + Care in Action to provide relief. +
From 4e2461a0d73100046f312f0f29013d898bffc2b5 Mon Sep 17 00:00:00 2001 From: Bartlomiej Plotka Date: Sat, 5 Mar 2022 13:05:10 +0100 Subject: [PATCH 08/24] website: Hot fix to banner. Signed-off-by: Bartlomiej Plotka --- website/layouts/index.html | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/website/layouts/index.html b/website/layouts/index.html index 389676887f1..3f35af42e63 100644 --- a/website/layouts/index.html +++ b/website/layouts/index.html @@ -43,12 +43,11 @@

Open source, highly available Prometheus setup with long term s

-
- The Thanos Team strongly condemns Putin's illegal invasion of Ukraine. +
The Thanos Team strongly condemns Putin's illegal invasion of Ukraine. Please consider donating to a humanitarian aid actions such as - Polish National Campaign to Support Ukraine + Polish National Campaign to Support Ukraine or - Care in Action to provide relief. + Care in Action to provide relief.
From e60ca7ea0a73dad9f283f641216b05e78a99116b Mon Sep 17 00:00:00 2001 From: Bartlomiej Plotka Date: Sat, 5 Mar 2022 13:43:29 +0100 Subject: [PATCH 09/24] website: Fixing banner. (#5215) Signed-off-by: Bartlomiej Plotka --- website/layouts/index.html | 6 +++--- website/static/main.css | 4 ++++ 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/website/layouts/index.html b/website/layouts/index.html index 3f35af42e63..198d250cfea 100644 --- a/website/layouts/index.html +++ b/website/layouts/index.html @@ -43,11 +43,11 @@

Open source, highly available Prometheus setup with long term s

-
The Thanos Team strongly condemns Putin's illegal invasion of Ukraine. +
The Thanos Team strongly condemns Putin's illegal invasion of Ukraine. Please consider donating to a humanitarian aid actions such as - Polish National Campaign to Support Ukraine + Polish National Campaign to Support Ukraine or - Care in Action to provide relief. + Care in Action to provide relief.
diff --git a/website/static/main.css b/website/static/main.css index 0142e2fd486..c76171a51bc 100644 --- a/website/static/main.css +++ b/website/static/main.css @@ -1,3 +1,7 @@ +.color-yellow { + color: #ad9c26; +} + .bg-purple { background-color: #6D41FF; } From 0c067eb0de1ee1b3fd9eed284b8f2876d72e528c Mon Sep 17 00:00:00 2001 From: Ben Ye Date: Mon, 7 Mar 2022 23:49:33 -0800 Subject: [PATCH 10/24] Run downsample tool continuously (#5218) * run downsample tool continuously Signed-off-by: Ben Ye * update changelog Signed-off-by: Ben Ye --- CHANGELOG.md | 3 ++- cmd/thanos/downsample.go | 48 ++++++++++++++++++++------------------ cmd/thanos/tools_bucket.go | 6 ++++- docs/components/tools.md | 1 + 4 files changed, 33 insertions(+), 25 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 83e9e0fe636..df11b95e102 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,7 +15,8 @@ We use *breaking :warning:* to mark changes that are not backward compatible (re ### Changed - [#5205](https://github.com/thanos-io/thanos/pull/5205) Rule: Add ruler labels as external labels in stateless ruler mode. -- [#5206](https://github.com/thanos-io/thanos/pull/5206) Cache: add timeout for groupcache's fetch operation +- [#5206](https://github.com/thanos-io/thanos/pull/5206) Cache: add timeout for groupcache's fetch operation. +- [#5218](https://github.com/thanos-io/thanos/pull/5218) Tools: Run bucket downsample tools continuously. ### Removed diff --git a/cmd/thanos/downsample.go b/cmd/thanos/downsample.go index 3537028268b..bd299c1e3e3 100644 --- a/cmd/thanos/downsample.go +++ b/cmd/thanos/downsample.go @@ -69,6 +69,7 @@ func RunDownsample( httpTLSConfig string, httpGracePeriod time.Duration, dataDir string, + waitInterval time.Duration, downsampleConcurrency int, objStoreConfig *extflag.PathOrContent, comp component.Component, @@ -113,31 +114,32 @@ func RunDownsample( defer runutil.CloseWithLogOnErr(logger, bkt, "bucket client") statusProber.Ready() - level.Info(logger).Log("msg", "start first pass of downsampling") - metas, _, err := metaFetcher.Fetch(ctx) - if err != nil { - return errors.Wrap(err, "sync before first pass of downsampling") - } - - for _, meta := range metas { - groupKey := meta.Thanos.GroupKey() - metrics.downsamples.WithLabelValues(groupKey) - metrics.downsampleFailures.WithLabelValues(groupKey) - } - if err := downsampleBucket(ctx, logger, metrics, bkt, metas, dataDir, downsampleConcurrency, hashFunc); err != nil { - return errors.Wrap(err, "downsampling failed") - } + return runutil.Repeat(waitInterval, ctx.Done(), func() error { + level.Info(logger).Log("msg", "start first pass of downsampling") + metas, _, err := metaFetcher.Fetch(ctx) + if err != nil { + return errors.Wrap(err, "sync before first pass of downsampling") + } - level.Info(logger).Log("msg", "start second pass of downsampling") - metas, _, err = metaFetcher.Fetch(ctx) - if err != nil { - return errors.Wrap(err, "sync before second pass of downsampling") - } - if err := downsampleBucket(ctx, logger, metrics, bkt, metas, dataDir, downsampleConcurrency, hashFunc); err != nil { - return errors.Wrap(err, "downsampling failed") - } + for _, meta := range metas { + groupKey := meta.Thanos.GroupKey() + metrics.downsamples.WithLabelValues(groupKey) + metrics.downsampleFailures.WithLabelValues(groupKey) + } + if err := downsampleBucket(ctx, logger, metrics, bkt, metas, dataDir, downsampleConcurrency, hashFunc); err != nil { + return errors.Wrap(err, "downsampling failed") + } - return nil + level.Info(logger).Log("msg", "start second pass of downsampling") + metas, _, err = metaFetcher.Fetch(ctx) + if err != nil { + return errors.Wrap(err, "sync before second pass of downsampling") + } + if err := downsampleBucket(ctx, logger, metrics, bkt, metas, dataDir, downsampleConcurrency, hashFunc); err != nil { + return errors.Wrap(err, "downsampling failed") + } + return nil + }) }, func(error) { cancel() }) diff --git a/cmd/thanos/tools_bucket.go b/cmd/thanos/tools_bucket.go index d9d612362ef..6964b4c71b8 100644 --- a/cmd/thanos/tools_bucket.go +++ b/cmd/thanos/tools_bucket.go @@ -129,6 +129,7 @@ type bucketReplicateConfig struct { } type bucketDownsampleConfig struct { + waitInterval time.Duration downsampleConcurrency int dataDir string hashFunc string @@ -224,6 +225,8 @@ func (tbc *bucketRewriteConfig) registerBucketRewriteFlag(cmd extkingpin.FlagCla } func (tbc *bucketDownsampleConfig) registerBucketDownsampleFlag(cmd extkingpin.FlagClause) *bucketDownsampleConfig { + cmd.Flag("wait-interval", "Wait interval between downsample runs."). + Default("5m").DurationVar(&tbc.waitInterval) cmd.Flag("downsample.concurrency", "Number of goroutines to use when downsampling blocks."). Default("1").IntVar(&tbc.downsampleConcurrency) cmd.Flag("data-dir", "Data directory in which to cache blocks and process downsamplings."). @@ -747,7 +750,8 @@ func registerBucketDownsample(app extkingpin.AppClause, objStoreConfig *extflag. tbc.registerBucketDownsampleFlag(cmd) cmd.Setup(func(g *run.Group, logger log.Logger, reg *prometheus.Registry, tracer opentracing.Tracer, _ <-chan struct{}, _ bool) error { - return RunDownsample(g, logger, reg, *httpAddr, *httpTLSConfig, time.Duration(*httpGracePeriod), tbc.dataDir, tbc.downsampleConcurrency, objStoreConfig, component.Downsample, metadata.HashFunc(tbc.hashFunc)) + return RunDownsample(g, logger, reg, *httpAddr, *httpTLSConfig, time.Duration(*httpGracePeriod), tbc.dataDir, + tbc.waitInterval, tbc.downsampleConcurrency, objStoreConfig, component.Downsample, metadata.HashFunc(tbc.hashFunc)) }) } diff --git a/docs/components/tools.md b/docs/components/tools.md index efa9dc475b0..80ceed95859 100644 --- a/docs/components/tools.md +++ b/docs/components/tools.md @@ -652,6 +652,7 @@ Flags: format details: https://thanos.io/tip/thanos/tracing.md/#configuration --version Show application version. + --wait-interval=5m Wait interval between downsample runs. ``` From 9ed8ed7b059d98576b5696f1a0cac56459fa426d Mon Sep 17 00:00:00 2001 From: nicholaswang Date: Tue, 8 Mar 2022 16:16:14 +0800 Subject: [PATCH 11/24] unify and tidy objstore setup (#5216) * unify and tidy objstore setup Signed-off-by: Nicholaswang * fix format in oss.go Signed-off-by: Nicholaswang * fix typo in objstore/gcs Signed-off-by: Nicholaswang * fix typo in objstore/gcs Signed-off-by: Nicholaswang * retrigger ci check Signed-off-by: Nicholaswang --- pkg/objstore/azure/azure.go | 5 +++++ pkg/objstore/bos/bos.go | 5 +++++ pkg/objstore/gcs/gcs.go | 1 + pkg/objstore/oss/oss.go | 22 +++++++++++++++++++--- 4 files changed, 30 insertions(+), 3 deletions(-) diff --git a/pkg/objstore/azure/azure.go b/pkg/objstore/azure/azure.go index f930254a0ee..f79b7df2379 100644 --- a/pkg/objstore/azure/azure.go +++ b/pkg/objstore/azure/azure.go @@ -182,6 +182,11 @@ func NewBucket(logger log.Logger, azureConfig []byte, component string) (*Bucket return nil, err } + return NewBucketWithConfig(logger, conf, component) +} + +// NewBucketWithConfig returns a new Bucket using the provided Azure config struct. +func NewBucketWithConfig(logger log.Logger, conf Config, component string) (*Bucket, error) { if err := conf.validate(); err != nil { return nil, err } diff --git a/pkg/objstore/bos/bos.go b/pkg/objstore/bos/bos.go index 18c70f791ff..a34f3eccdaa 100644 --- a/pkg/objstore/bos/bos.go +++ b/pkg/objstore/bos/bos.go @@ -75,6 +75,11 @@ func NewBucket(logger log.Logger, conf []byte, component string) (*Bucket, error return nil, errors.Wrap(err, "parsing BOS configuration") } + return NewBucketWithConfig(logger, config, component) +} + +// NewBucketWithConfig returns a new Bucket using the provided bos config struct. +func NewBucketWithConfig(logger log.Logger, config Config, component string) (*Bucket, error) { if err := config.validate(); err != nil { return nil, errors.Wrap(err, "validating BOS configuration") } diff --git a/pkg/objstore/gcs/gcs.go b/pkg/objstore/gcs/gcs.go index 0bc5a947a48..ce93f42c0c3 100644 --- a/pkg/objstore/gcs/gcs.go +++ b/pkg/objstore/gcs/gcs.go @@ -52,6 +52,7 @@ func NewBucket(ctx context.Context, logger log.Logger, conf []byte, component st return NewBucketWithConfig(ctx, logger, gc, component) } +// NewBucketWithConfig returns a new Bucket with gcs Config struct. func NewBucketWithConfig(ctx context.Context, logger log.Logger, gc Config, component string) (*Bucket, error) { if gc.Bucket == "" { return nil, errors.New("missing Google Cloud Storage bucket name for stored blocks") diff --git a/pkg/objstore/oss/oss.go b/pkg/objstore/oss/oss.go index 4c77cad1ec4..5e96c3ddf10 100644 --- a/pkg/objstore/oss/oss.go +++ b/pkg/objstore/oss/oss.go @@ -164,9 +164,13 @@ func NewBucket(logger log.Logger, conf []byte, component string) (*Bucket, error return nil, errors.Wrap(err, "parse aliyun oss config file failed") } - if config.Endpoint == "" || config.Bucket == "" || config.AccessKeyID == "" || config.AccessKeySecret == "" { - return nil, errors.New("aliyun oss endpoint or bucket or access_key_id or access_key_secret " + - "is not present in config file") + return NewBucketWithConfig(logger, config, component) +} + +// NewBucketWithConfig returns a new Bucket using the provided oss config struct. +func NewBucketWithConfig(logger log.Logger, config Config, component string) (*Bucket, error) { + if err := validate(config); err != nil { + return nil, err } client, err := alioss.New(config.Endpoint, config.AccessKeyID, config.AccessKeySecret) @@ -188,6 +192,18 @@ func NewBucket(logger log.Logger, conf []byte, component string) (*Bucket, error return bkt, nil } +// validate checks to see the config options are set. +func validate(config Config) error { + if config.Endpoint == "" || config.Bucket == "" { + return errors.New("aliyun oss endpoint or bucket is not present in config file") + } + if config.AccessKeyID == "" || config.AccessKeySecret == "" { + return errors.New("aliyun oss access_key_id or access_key_secret is not present in config file") + } + + return nil +} + // Iter calls f for each entry in the given directory (not recursive). The argument to f is the full // object name including the prefix of the inspected directory. func (b *Bucket) Iter(ctx context.Context, dir string, f func(string) error, options ...objstore.IterOption) error { From 122ca4e9ca3987f4a064ee9c96c53187aa9d6dc5 Mon Sep 17 00:00:00 2001 From: David Koller <58932443+appit-online@users.noreply.github.com> Date: Wed, 9 Mar 2022 13:51:21 +0100 Subject: [PATCH 12/24] Change Server TLS Min Version to 1.3 (#5170) Signed-off-by: David Koller --- pkg/tls/options.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/tls/options.go b/pkg/tls/options.go index 2aebb825e41..8d5c134e7e6 100644 --- a/pkg/tls/options.go +++ b/pkg/tls/options.go @@ -35,7 +35,7 @@ func NewServerConfig(logger log.Logger, cert, key, clientCA string) (*tls.Config } tlsCfg := &tls.Config{ - MinVersion: tls.VersionTLS12, + MinVersion: tls.VersionTLS13, } mngr := &serverTLSManager{ From 3026e58fac4f670868ec4579bd8ce70226968a35 Mon Sep 17 00:00:00 2001 From: Pablo RUTH Date: Wed, 9 Mar 2022 22:11:23 +0100 Subject: [PATCH 13/24] receive: remove sort on label hashing (#5224) * Remove pre-sort on label hashing Signed-off-by: Pablo RUTH * Add CHANGELOG entry Signed-off-by: Pablo RUTH --- CHANGELOG.md | 1 + pkg/receive/hashring.go | 3 --- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index df11b95e102..5d42b58784e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,6 +17,7 @@ We use *breaking :warning:* to mark changes that are not backward compatible (re - [#5205](https://github.com/thanos-io/thanos/pull/5205) Rule: Add ruler labels as external labels in stateless ruler mode. - [#5206](https://github.com/thanos-io/thanos/pull/5206) Cache: add timeout for groupcache's fetch operation. - [#5218](https://github.com/thanos-io/thanos/pull/5218) Tools: Run bucket downsample tools continuously. +- [#5224](https://github.com/thanos-io/thanos/pull/5224) Receive: Remove sort on label hashing ### Removed diff --git a/pkg/receive/hashring.go b/pkg/receive/hashring.go index 8a72e0f96b5..520a610d0fe 100644 --- a/pkg/receive/hashring.go +++ b/pkg/receive/hashring.go @@ -6,7 +6,6 @@ package receive import ( "context" "fmt" - "sort" "sync" "github.com/pkg/errors" @@ -67,8 +66,6 @@ func (s simpleHashring) GetN(tenant string, ts *prompb.TimeSeries, n uint64) (st return "", &insufficientNodesError{have: uint64(len(s)), want: n + 1} } - sort.Slice(ts.Labels, func(i, j int) bool { return ts.Labels[i].Name < ts.Labels[j].Name }) - return s[(labelpb.HashWithPrefix(tenant, ts.Labels)+n)%uint64(len(s))], nil } From 6eb5ce6f78047cee625b96e787667cd60c4593b4 Mon Sep 17 00:00:00 2001 From: Casi <47807658+lcasi@users.noreply.github.com> Date: Sun, 13 Mar 2022 09:10:11 +0800 Subject: [PATCH 14/24] support forward-header (#5220) Signed-off-by: lcasi --- CHANGELOG.md | 4 +++ cmd/thanos/query_frontend.go | 2 ++ docs/components/query-frontend.md | 16 +++++++++++ pkg/queryfrontend/config.go | 1 + pkg/queryfrontend/labels_codec.go | 41 +++++++++++++++++++++++---- pkg/queryfrontend/queryrange_codec.go | 16 +++++++++-- pkg/queryfrontend/request.go | 8 ++++++ pkg/queryfrontend/roundtrip.go | 10 ++++--- 8 files changed, 87 insertions(+), 11 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5d42b58784e..2b21adc101a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,10 @@ We use *breaking :warning:* to mark changes that are not backward compatible (re ### Fixed +### Added + +- [#5220](https://github.com/thanos-io/thanos/pull/5220) Query Frontend: Add `--query-frontend.forward-header` flag, forward headers to downstream querier. + ### Changed - [#5205](https://github.com/thanos-io/thanos/pull/5205) Rule: Add ruler labels as external labels in stateless ruler mode. diff --git a/cmd/thanos/query_frontend.go b/cmd/thanos/query_frontend.go index 007e44a2e53..b6a85d3a5dd 100644 --- a/cmd/thanos/query_frontend.go +++ b/cmd/thanos/query_frontend.go @@ -134,6 +134,8 @@ func registerQueryFrontend(app *extkingpin.App) { "If multiple headers match the request, the first matching arg specified will take precedence. "+ "If no headers match 'anonymous' will be used.").PlaceHolder("").StringsVar(&cfg.orgIdHeaders) + cmd.Flag("query-frontend.forward-header", "List of headers forwarded by the query-frontend to downstream queriers, default is empty").PlaceHolder("").StringsVar(&cfg.ForwardHeaders) + cmd.Flag("log.request.decision", "Deprecation Warning - This flag would be soon deprecated, and replaced with `request.logging-config`. Request Logging for logging the start and end of requests. By default this flag is disabled. LogFinishCall : Logs the finish call of the requests. LogStartAndFinishCall : Logs the start and finish call of the requests. NoLogCall : Disable request logging.").Default("").EnumVar(&cfg.RequestLoggingDecision, "NoLogCall", "LogFinishCall", "LogStartAndFinishCall", "") reqLogConfig := extkingpin.RegisterRequestLoggingFlags(cmd) diff --git a/docs/components/query-frontend.md b/docs/components/query-frontend.md index 41959b558d2..5f6d997cf5b 100644 --- a/docs/components/query-frontend.md +++ b/docs/components/query-frontend.md @@ -148,6 +148,19 @@ Keys which denote a duration are strings that can end with `s` or `m` to indicat You can find the default values [here](https://github.com/thanos-io/thanos/blob/55cb8ca38b3539381dc6a781e637df15c694e50a/pkg/exthttp/transport.go#L12-L27). +## Forward Headers to Downstream Queriers + +`--query-frontend.forward-header` flag provides list of request headers forwarded by query frontend to downstream queriers. + +If downstream queriers need basic authentication to access, we can run query-frontend: + +```bash +thanos query-frontend \ + --http-address "0.0.0.0:9090" \ + --query-frontend.forward-header "Authorization" + --query-frontend.downstream-url=":" +``` + ## Flags ```$ mdox-exec="thanos query-frontend --help" @@ -233,6 +246,9 @@ Flags: --query-frontend.downstream-url="http://localhost:9090" URL of downstream Prometheus Query compatible API. + --query-frontend.forward-header= ... + List of headers forwarded by the query-frontend + to downstream queriers, default is empty --query-frontend.log-queries-longer-than=0 Log queries that are slower than the specified duration. Set to 0 to disable. Set to < 0 to diff --git a/pkg/queryfrontend/config.go b/pkg/queryfrontend/config.go index 7d49bc53f5e..d0b24596b85 100644 --- a/pkg/queryfrontend/config.go +++ b/pkg/queryfrontend/config.go @@ -203,6 +203,7 @@ type Config struct { CacheCompression string RequestLoggingDecision string DownstreamURL string + ForwardHeaders []string } // QueryRangeConfig holds the config for query range tripperware. diff --git a/pkg/queryfrontend/labels_codec.go b/pkg/queryfrontend/labels_codec.go index 8596fc3ac4a..342737556b1 100644 --- a/pkg/queryfrontend/labels_codec.go +++ b/pkg/queryfrontend/labels_codec.go @@ -107,7 +107,7 @@ func (c labelsCodec) MergeResponse(responses ...queryrange.Response) (queryrange } } -func (c labelsCodec) DecodeRequest(_ context.Context, r *http.Request, _ []string) (queryrange.Request, error) { +func (c labelsCodec) DecodeRequest(_ context.Context, r *http.Request, forwardHeaders []string) (queryrange.Request, error) { if err := r.ParseForm(); err != nil { return nil, httpgrpc.Errorf(http.StatusBadRequest, err.Error()) } @@ -118,9 +118,9 @@ func (c labelsCodec) DecodeRequest(_ context.Context, r *http.Request, _ []strin ) switch op := getOperation(r); op { case labelNamesOp, labelValuesOp: - req, err = c.parseLabelsRequest(r, op) + req, err = c.parseLabelsRequest(r, op, forwardHeaders) case seriesOp: - req, err = c.parseSeriesRequest(r) + req, err = c.parseSeriesRequest(r, forwardHeaders) } if err != nil { return nil, err @@ -167,6 +167,12 @@ func (c labelsCodec) EncodeRequest(ctx context.Context, r queryrange.Request) (* req.Header.Set("Content-Type", "application/x-www-form-urlencoded") } + for _, hv := range thanosReq.Headers { + for _, v := range hv.Values { + req.Header.Add(hv.Name, v) + } + } + case *ThanosSeriesRequest: var params = url.Values{ "start": []string{encodeTime(thanosReq.Start)}, @@ -187,6 +193,11 @@ func (c labelsCodec) EncodeRequest(ctx context.Context, r queryrange.Request) (* return nil, httpgrpc.Errorf(http.StatusBadRequest, "error creating request: %s", err.Error()) } req.Header.Set("Content-Type", "application/x-www-form-urlencoded") + for _, hv := range thanosReq.Headers { + for _, v := range hv.Values { + req.Header.Add(hv.Name, v) + } + } default: return nil, httpgrpc.Errorf(http.StatusInternalServerError, "invalid request format") @@ -271,7 +282,7 @@ func (c labelsCodec) EncodeResponse(ctx context.Context, res queryrange.Response return &resp, nil } -func (c labelsCodec) parseLabelsRequest(r *http.Request, op string) (queryrange.Request, error) { +func (c labelsCodec) parseLabelsRequest(r *http.Request, op string, forwardHeaders []string) (queryrange.Request, error) { var ( result ThanosLabelsRequest err error @@ -312,10 +323,20 @@ func (c labelsCodec) parseLabelsRequest(r *http.Request, op string) (queryrange. } } + // Include the specified headers from http request in prometheusRequest. + for _, header := range forwardHeaders { + for h, hv := range r.Header { + if strings.EqualFold(h, header) { + result.Headers = append(result.Headers, &RequestHeader{Name: h, Values: hv}) + break + } + } + } + return &result, nil } -func (c labelsCodec) parseSeriesRequest(r *http.Request) (queryrange.Request, error) { +func (c labelsCodec) parseSeriesRequest(r *http.Request, forwardHeaders []string) (queryrange.Request, error) { var ( result ThanosSeriesRequest err error @@ -358,6 +379,16 @@ func (c labelsCodec) parseSeriesRequest(r *http.Request) (queryrange.Request, er } } + // Include the specified headers from http request in prometheusRequest. + for _, header := range forwardHeaders { + for h, hv := range r.Header { + if strings.EqualFold(h, header) { + result.Headers = append(result.Headers, &RequestHeader{Name: h, Values: hv}) + break + } + } + } + return &result, nil } diff --git a/pkg/queryfrontend/queryrange_codec.go b/pkg/queryfrontend/queryrange_codec.go index fb524a29ce7..a0366e5d623 100644 --- a/pkg/queryfrontend/queryrange_codec.go +++ b/pkg/queryfrontend/queryrange_codec.go @@ -53,7 +53,7 @@ func NewThanosQueryRangeCodec(partialResponse bool) *queryRangeCodec { } } -func (c queryRangeCodec) DecodeRequest(_ context.Context, r *http.Request, _ []string) (queryrange.Request, error) { +func (c queryRangeCodec) DecodeRequest(_ context.Context, r *http.Request, forwardHeaders []string) (queryrange.Request, error) { var ( result ThanosQueryRangeRequest err error @@ -126,6 +126,14 @@ func (c queryRangeCodec) DecodeRequest(_ context.Context, r *http.Request, _ []s } } + for _, header := range forwardHeaders { + for h, hv := range r.Header { + if strings.EqualFold(h, header) { + result.Headers = append(result.Headers, &RequestHeader{Name: h, Values: hv}) + break + } + } + } return &result, nil } @@ -161,7 +169,11 @@ func (c queryRangeCodec) EncodeRequest(ctx context.Context, r queryrange.Request return nil, httpgrpc.Errorf(http.StatusBadRequest, "error creating request: %s", err.Error()) } req.Header.Set("Content-Type", "application/x-www-form-urlencoded") - + for _, hv := range thanosReq.Headers { + for _, v := range hv.Values { + req.Header.Add(hv.Name, v) + } + } return req.WithContext(ctx), nil } diff --git a/pkg/queryfrontend/request.go b/pkg/queryfrontend/request.go index bb11b680d6a..0164eac91fa 100644 --- a/pkg/queryfrontend/request.go +++ b/pkg/queryfrontend/request.go @@ -19,6 +19,11 @@ type ThanosRequest interface { GetStoreMatchers() [][]*labels.Matcher } +type RequestHeader struct { + Name string + Values []string +} + type ThanosQueryRangeRequest struct { Path string Start int64 @@ -33,6 +38,7 @@ type ThanosQueryRangeRequest struct { ReplicaLabels []string StoreMatchers [][]*labels.Matcher CachingOptions queryrange.CachingOptions + Headers []*RequestHeader } // GetStart returns the start timestamp of the request in milliseconds. @@ -107,6 +113,7 @@ type ThanosLabelsRequest struct { StoreMatchers [][]*labels.Matcher PartialResponse bool CachingOptions queryrange.CachingOptions + Headers []*RequestHeader } // GetStart returns the start timestamp of the request in milliseconds. @@ -178,6 +185,7 @@ type ThanosSeriesRequest struct { Matchers [][]*labels.Matcher StoreMatchers [][]*labels.Matcher CachingOptions queryrange.CachingOptions + Headers []*RequestHeader } // GetStart returns the start timestamp of the request in milliseconds. diff --git a/pkg/queryfrontend/roundtrip.go b/pkg/queryfrontend/roundtrip.go index 570d3153390..405e93c2938 100644 --- a/pkg/queryfrontend/roundtrip.go +++ b/pkg/queryfrontend/roundtrip.go @@ -51,13 +51,13 @@ func NewTripperware(config Config, reg prometheus.Registerer, logger log.Logger) labelsCodec := NewThanosLabelsCodec(config.LabelsConfig.PartialResponseStrategy, config.DefaultTimeRange) queryRangeTripperware, err := newQueryRangeTripperware(config.QueryRangeConfig, queryRangeLimits, queryRangeCodec, - prometheus.WrapRegistererWith(prometheus.Labels{"tripperware": "query_range"}, reg), logger) + prometheus.WrapRegistererWith(prometheus.Labels{"tripperware": "query_range"}, reg), logger, config.ForwardHeaders) if err != nil { return nil, err } labelsTripperware, err := newLabelsTripperware(config.LabelsConfig, labelsLimits, labelsCodec, - prometheus.WrapRegistererWith(prometheus.Labels{"tripperware": "labels"}, reg), logger) + prometheus.WrapRegistererWith(prometheus.Labels{"tripperware": "labels"}, reg), logger, config.ForwardHeaders) if err != nil { return nil, err } @@ -138,6 +138,7 @@ func newQueryRangeTripperware( codec *queryRangeCodec, reg prometheus.Registerer, logger log.Logger, + forwardHeaders []string, ) (queryrange.Tripperware, error) { queryRangeMiddleware := []queryrange.Middleware{queryrange.NewLimitsMiddleware(limits)} m := queryrange.NewInstrumentMiddlewareMetrics(reg) @@ -203,7 +204,7 @@ func newQueryRangeTripperware( } return func(next http.RoundTripper) http.RoundTripper { - rt := queryrange.NewRoundTripper(next, codec, nil, queryRangeMiddleware...) + rt := queryrange.NewRoundTripper(next, codec, forwardHeaders, queryRangeMiddleware...) return queryrange.RoundTripFunc(func(r *http.Request) (*http.Response, error) { return rt.RoundTrip(r) }) @@ -218,6 +219,7 @@ func newLabelsTripperware( codec *labelsCodec, reg prometheus.Registerer, logger log.Logger, + forwardHeaders []string, ) (queryrange.Tripperware, error) { labelsMiddleware := []queryrange.Middleware{} m := queryrange.NewInstrumentMiddlewareMetrics(reg) @@ -265,7 +267,7 @@ func newLabelsTripperware( ) } return func(next http.RoundTripper) http.RoundTripper { - rt := queryrange.NewRoundTripper(next, codec, nil, labelsMiddleware...) + rt := queryrange.NewRoundTripper(next, codec, forwardHeaders, labelsMiddleware...) return queryrange.RoundTripFunc(func(r *http.Request) (*http.Response, error) { return rt.RoundTrip(r) }) From b096db19d2fda037b0d372e61f67ba647d64898f Mon Sep 17 00:00:00 2001 From: Ben Ye Date: Mon, 14 Mar 2022 12:48:14 -0700 Subject: [PATCH 15/24] ignore blocks with deletion markers (#5231) * ignore blocks with deletion markers Signed-off-by: Ben Ye * update changelog Signed-off-by: Ben Ye * use fetcher concurrency Signed-off-by: Ben Ye --- CHANGELOG.md | 1 + cmd/thanos/tools_bucket.go | 4 +++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2b21adc101a..44721110d47 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,6 +22,7 @@ We use *breaking :warning:* to mark changes that are not backward compatible (re - [#5206](https://github.com/thanos-io/thanos/pull/5206) Cache: add timeout for groupcache's fetch operation. - [#5218](https://github.com/thanos-io/thanos/pull/5218) Tools: Run bucket downsample tools continuously. - [#5224](https://github.com/thanos-io/thanos/pull/5224) Receive: Remove sort on label hashing +- [#5231](https://github.com/thanos-io/thanos/pull/5231) Tools: Bucket verify tool ignores blocks with deletion markers. ### Removed diff --git a/cmd/thanos/tools_bucket.go b/cmd/thanos/tools_bucket.go index 6964b4c71b8..959d2a146c5 100644 --- a/cmd/thanos/tools_bucket.go +++ b/cmd/thanos/tools_bucket.go @@ -333,7 +333,9 @@ func registerBucketVerify(app extkingpin.AppClause, objStoreConfig *extflag.Path return err } - fetcher, err := block.NewMetaFetcher(logger, block.FetcherConcurrency, bkt, "", extprom.WrapRegistererWithPrefix(extpromPrefix, reg), nil) + // We ignore any block that has the deletion marker file. + filters := []block.MetadataFilter{block.NewIgnoreDeletionMarkFilter(logger, bkt, 0, block.FetcherConcurrency)} + fetcher, err := block.NewMetaFetcher(logger, block.FetcherConcurrency, bkt, "", extprom.WrapRegistererWithPrefix(extpromPrefix, reg), filters) if err != nil { return err } From 430d8bedf2edeb9d8ef40f69ee10819144d99eeb Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Wed, 16 Mar 2022 13:20:55 +0200 Subject: [PATCH 16/24] Updates busybox SHA (#5234) Signed-off-by: GitHub Co-authored-by: yeya24 --- .busybox-versions | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.busybox-versions b/.busybox-versions index e7bfa0395e4..83c5d09653a 100644 --- a/.busybox-versions +++ b/.busybox-versions @@ -1,6 +1,6 @@ # Auto generated by busybox-updater.sh. DO NOT EDIT -amd64=42977f138f0655240a4bd4aed4fe1731cff3bc57077ff695ea7cd4653fc1c6e6 -arm64=2f0470d84715de55c3446dd074e954b8d84f887c16fd0bb2de54b3734ba5ae83 -arm=5fb75cf689dcccfc5198aa4cbd7ecf04bc7e44e74220154b4f0f75a7c855318f -ppc64le=a9a9102107c48b12f1e31e722a26d6ad985111b9840d0f72b92e1cce815b83f7 -s390x=9f6a7897398d997568a69d3c5badc9cdc75c71cd0aedc497571e5c6e9635e7db +amd64=43ffc578ff003ad18001c2113b903d236c82160c1a15491ec5f0e9b4268c4f82 +arm64=127fc3a6349a3c038e34553c6bb2b25555fad4efc46ee5e8d6cbf3eb8b8d3481 +arm=00876e80943db090f3310bc6ead8af89c0dfcb4c966a5e04363ab4792042e799 +ppc64le=818b8ead5e414228347fdc75c059b7b35d855e75e1acbe1bad7ccf93a8522a24 +s390x=0595b0070eb8000faaf952fc0001c695e4258009f3079ce7082ceae7a70b716c From 9ec283dbea39be1708401eed2830e40a05f6d46a Mon Sep 17 00:00:00 2001 From: Matej Gera <38492574+matej-g@users.noreply.github.com> Date: Wed, 16 Mar 2022 19:28:55 +0100 Subject: [PATCH 17/24] E2E Tests: Improve Prometheus configurability and make query tests more resilient (#5181) * Move prom config to services; optional self scraping - Move prom config method to shared e2ethanos package - Make scraping Prometheus instance optional Signed-off-by: Matej Gera * Bump wait time for minio ready Signed-off-by: Matej Gera * Address feedback Signed-off-by: Matej Gera * Adjust DefaultPromConfig signature; use constant instead Signed-off-by: Matej Gera --- test/e2e/e2ethanos/services.go | 64 ++++++++++++++++++ test/e2e/exemplars_api_test.go | 4 +- test/e2e/info_api_test.go | 6 +- test/e2e/metadata_api_test.go | 4 +- test/e2e/query_frontend_test.go | 20 +++--- test/e2e/query_test.go | 116 +++++++++----------------------- test/e2e/receive_test.go | 44 ++++++------ test/e2e/rules_api_test.go | 4 +- test/e2e/targets_api_test.go | 4 +- 9 files changed, 139 insertions(+), 127 deletions(-) diff --git a/test/e2e/e2ethanos/services.go b/test/e2e/e2ethanos/services.go index 75c5bb16cd2..7a63d23ef14 100644 --- a/test/e2e/e2ethanos/services.go +++ b/test/e2e/e2ethanos/services.go @@ -1037,3 +1037,67 @@ func NewS3Config(bucket, endpoint, basePath string) s3.Config { }, } } + +// NOTE: by using aggregation all results are now unsorted. +var QueryUpWithoutInstance = func() string { return "sum(up) without (instance)" } + +// LocalPrometheusTarget is a constant to be used in the Prometheus config if you +// wish to enable Prometheus to scrape itself in a test. +const LocalPrometheusTarget = "localhost:9090" + +// DefaultPromConfig returns Prometheus config that sets Prometheus to: +// * expose 2 external labels, source and replica. +// * optionallly scrape self. This will produce up == 0 metric which we can assert on. +// * optionally remote write endpoint to write into. +func DefaultPromConfig(name string, replica int, remoteWriteEndpoint, ruleFile string, scrapeTargets ...string) string { + var targets string + if len(scrapeTargets) > 0 { + targets = strings.Join(scrapeTargets, ",") + } + + config := fmt.Sprintf(` +global: + external_labels: + prometheus: %v + replica: %v +`, name, replica) + + if targets != "" { + config = fmt.Sprintf(` +%s +scrape_configs: +- job_name: 'myself' + # Quick scrapes for test purposes. + scrape_interval: 1s + scrape_timeout: 1s + static_configs: + - targets: [%s] + relabel_configs: + - source_labels: ['__address__'] + regex: '^.+:80$' + action: drop +`, config, targets) + } + + if remoteWriteEndpoint != "" { + config = fmt.Sprintf(` +%s +remote_write: +- url: "%s" + # Don't spam receiver on mistake. + queue_config: + min_backoff: 2s + max_backoff: 10s +`, config, remoteWriteEndpoint) + } + + if ruleFile != "" { + config = fmt.Sprintf(` +%s +rule_files: +- "%s" +`, config, ruleFile) + } + + return config +} diff --git a/test/e2e/exemplars_api_test.go b/test/e2e/exemplars_api_test.go index 5bc2d1532de..ad2bc049d70 100644 --- a/test/e2e/exemplars_api_test.go +++ b/test/e2e/exemplars_api_test.go @@ -42,7 +42,7 @@ func TestExemplarsAPI_Fanout(t *testing.T) { prom1, sidecar1, err = e2ethanos.NewPrometheusWithSidecar( e, "prom1", - defaultPromConfig("ha", 0, "", "", "localhost:9090", qUnitiated.Future().InternalEndpoint("http")), + e2ethanos.DefaultPromConfig("ha", 0, "", "", "localhost:9090", qUnitiated.Future().InternalEndpoint("http"), e2ethanos.LocalPrometheusTarget), "", e2ethanos.DefaultPrometheusImage(), "", @@ -52,7 +52,7 @@ func TestExemplarsAPI_Fanout(t *testing.T) { prom2, sidecar2, err = e2ethanos.NewPrometheusWithSidecar( e, "prom2", - defaultPromConfig("ha", 1, "", "", "localhost:9090", qUnitiated.Future().InternalEndpoint("http")), + e2ethanos.DefaultPromConfig("ha", 1, "", "", "localhost:9090", qUnitiated.Future().InternalEndpoint("http"), e2ethanos.LocalPrometheusTarget), "", e2ethanos.DefaultPrometheusImage(), "", diff --git a/test/e2e/info_api_test.go b/test/e2e/info_api_test.go index 3f759a35ab5..6122d9ea6b1 100644 --- a/test/e2e/info_api_test.go +++ b/test/e2e/info_api_test.go @@ -30,11 +30,11 @@ func TestInfo(t *testing.T) { testutil.Ok(t, err) t.Cleanup(e2ethanos.CleanScenario(t, e)) - prom1, sidecar1, err := e2ethanos.NewPrometheusWithSidecar(e, "alone1", defaultPromConfig("prom-alone1", 0, "", ""), "", e2ethanos.DefaultPrometheusImage(), "") + prom1, sidecar1, err := e2ethanos.NewPrometheusWithSidecar(e, "alone1", e2ethanos.DefaultPromConfig("prom-alone1", 0, "", "", e2ethanos.LocalPrometheusTarget), "", e2ethanos.DefaultPrometheusImage(), "") testutil.Ok(t, err) - prom2, sidecar2, err := e2ethanos.NewPrometheusWithSidecar(e, "alone2", defaultPromConfig("prom-alone2", 0, "", ""), "", e2ethanos.DefaultPrometheusImage(), "") + prom2, sidecar2, err := e2ethanos.NewPrometheusWithSidecar(e, "alone2", e2ethanos.DefaultPromConfig("prom-alone2", 0, "", "", e2ethanos.LocalPrometheusTarget), "", e2ethanos.DefaultPrometheusImage(), "") testutil.Ok(t, err) - prom3, sidecar3, err := e2ethanos.NewPrometheusWithSidecar(e, "alone3", defaultPromConfig("prom-alone3", 0, "", ""), "", e2ethanos.DefaultPrometheusImage(), "") + prom3, sidecar3, err := e2ethanos.NewPrometheusWithSidecar(e, "alone3", e2ethanos.DefaultPromConfig("prom-alone3", 0, "", "", e2ethanos.LocalPrometheusTarget), "", e2ethanos.DefaultPrometheusImage(), "") testutil.Ok(t, err) testutil.Ok(t, e2e.StartAndWaitReady(prom1, sidecar1, prom2, sidecar2, prom3, sidecar3)) diff --git a/test/e2e/metadata_api_test.go b/test/e2e/metadata_api_test.go index 1d4c9e4b502..00a87531b42 100644 --- a/test/e2e/metadata_api_test.go +++ b/test/e2e/metadata_api_test.go @@ -30,7 +30,7 @@ func TestMetadataAPI_Fanout(t *testing.T) { prom1, sidecar1, err := e2ethanos.NewPrometheusWithSidecar( e, "prom1", - defaultPromConfig("ha", 0, "", "", "localhost:9090", "sidecar-prom1:8080"), + e2ethanos.DefaultPromConfig("ha", 0, "", "", e2ethanos.LocalPrometheusTarget, "sidecar-prom1:8080"), "", e2ethanos.DefaultPrometheusImage(), "", ) @@ -39,7 +39,7 @@ func TestMetadataAPI_Fanout(t *testing.T) { prom2, sidecar2, err := e2ethanos.NewPrometheusWithSidecar( e, "prom2", - defaultPromConfig("ha", 1, "", "", "localhost:9090", "sidecar-prom2:8080"), + e2ethanos.DefaultPromConfig("ha", 1, "", "", e2ethanos.LocalPrometheusTarget, "sidecar-prom2:8080"), "", e2ethanos.DefaultPrometheusImage(), "", ) diff --git a/test/e2e/query_frontend_test.go b/test/e2e/query_frontend_test.go index d9647551a81..c3a8d720ef8 100644 --- a/test/e2e/query_frontend_test.go +++ b/test/e2e/query_frontend_test.go @@ -32,7 +32,7 @@ func TestQueryFrontend(t *testing.T) { now := time.Now() - prom, sidecar, err := e2ethanos.NewPrometheusWithSidecar(e, "1", defaultPromConfig("test", 0, "", ""), "", e2ethanos.DefaultPrometheusImage(), "") + prom, sidecar, err := e2ethanos.NewPrometheusWithSidecar(e, "1", e2ethanos.DefaultPromConfig("test", 0, "", "", e2ethanos.LocalPrometheusTarget), "", e2ethanos.DefaultPrometheusImage(), "") testutil.Ok(t, err) testutil.Ok(t, e2e.StartAndWaitReady(prom, sidecar)) @@ -59,7 +59,7 @@ func TestQueryFrontend(t *testing.T) { // Ensure we can get the result from Querier first so that it // doesn't need to retry when we send queries to the frontend later. - queryAndAssertSeries(t, ctx, q.Endpoint("http"), queryUpWithoutInstance, time.Now, promclient.QueryOptions{ + queryAndAssertSeries(t, ctx, q.Endpoint("http"), e2ethanos.QueryUpWithoutInstance, time.Now, promclient.QueryOptions{ Deduplicate: false, }, []model.Metric{ { @@ -77,7 +77,7 @@ func TestQueryFrontend(t *testing.T) { queryTimes := vals[0] t.Run("query frontend works for instant query", func(t *testing.T) { - queryAndAssertSeries(t, ctx, queryFrontend.Endpoint("http"), queryUpWithoutInstance, time.Now, promclient.QueryOptions{ + queryAndAssertSeries(t, ctx, queryFrontend.Endpoint("http"), e2ethanos.QueryUpWithoutInstance, time.Now, promclient.QueryOptions{ Deduplicate: false, }, []model.Metric{ { @@ -105,7 +105,7 @@ func TestQueryFrontend(t *testing.T) { t, ctx, queryFrontend.Endpoint("http"), - queryUpWithoutInstance, + e2ethanos.QueryUpWithoutInstance, timestamp.FromTime(now.Add(-time.Hour)), timestamp.FromTime(now.Add(time.Hour)), 14, @@ -147,7 +147,7 @@ func TestQueryFrontend(t *testing.T) { t, ctx, queryFrontend.Endpoint("http"), - queryUpWithoutInstance, + e2ethanos.QueryUpWithoutInstance, timestamp.FromTime(now.Add(-time.Hour)), timestamp.FromTime(now.Add(time.Hour)), 14, @@ -192,7 +192,7 @@ func TestQueryFrontend(t *testing.T) { t, ctx, queryFrontend.Endpoint("http"), - queryUpWithoutInstance, + e2ethanos.QueryUpWithoutInstance, timestamp.FromTime(now.Add(-time.Hour)), timestamp.FromTime(now.Add(24*time.Hour)), 14, @@ -396,7 +396,7 @@ func TestQueryFrontendMemcachedCache(t *testing.T) { now := time.Now() - prom, sidecar, err := e2ethanos.NewPrometheusWithSidecar(e, "1", defaultPromConfig("test", 0, "", ""), "", e2ethanos.DefaultPrometheusImage(), "") + prom, sidecar, err := e2ethanos.NewPrometheusWithSidecar(e, "1", e2ethanos.DefaultPromConfig("test", 0, "", "", e2ethanos.LocalPrometheusTarget), "", e2ethanos.DefaultPrometheusImage(), "") testutil.Ok(t, err) testutil.Ok(t, e2e.StartAndWaitReady(prom, sidecar)) @@ -436,7 +436,7 @@ func TestQueryFrontendMemcachedCache(t *testing.T) { // Ensure we can get the result from Querier first so that it // doesn't need to retry when we send queries to the frontend later. - queryAndAssertSeries(t, ctx, q.Endpoint("http"), queryUpWithoutInstance, time.Now, promclient.QueryOptions{ + queryAndAssertSeries(t, ctx, q.Endpoint("http"), e2ethanos.QueryUpWithoutInstance, time.Now, promclient.QueryOptions{ Deduplicate: false, }, []model.Metric{ { @@ -455,7 +455,7 @@ func TestQueryFrontendMemcachedCache(t *testing.T) { t, ctx, queryFrontend.Endpoint("http"), - queryUpWithoutInstance, + e2ethanos.QueryUpWithoutInstance, timestamp.FromTime(now.Add(-time.Hour)), timestamp.FromTime(now.Add(time.Hour)), 14, @@ -485,7 +485,7 @@ func TestQueryFrontendMemcachedCache(t *testing.T) { t, ctx, queryFrontend.Endpoint("http"), - queryUpWithoutInstance, + e2ethanos.QueryUpWithoutInstance, timestamp.FromTime(now.Add(-time.Hour)), timestamp.FromTime(now.Add(time.Hour)), 14, diff --git a/test/e2e/query_test.go b/test/e2e/query_test.go index e34ec52fb4f..280b2cd9d13 100644 --- a/test/e2e/query_test.go +++ b/test/e2e/query_test.go @@ -50,59 +50,6 @@ import ( "github.com/thanos-io/thanos/test/e2e/e2ethanos" ) -// NOTE: by using aggregation all results are now unsorted. -var queryUpWithoutInstance = func() string { return "sum(up) without (instance)" } - -// defaultPromConfig returns Prometheus config that sets Prometheus to: -// * expose 2 external labels, source and replica. -// * scrape fake target. This will produce up == 0 metric which we can assert on. -// * optionally remote write endpoint to write into. -func defaultPromConfig(name string, replica int, remoteWriteEndpoint, ruleFile string, scrapeTargets ...string) string { - targets := "localhost:9090" - if len(scrapeTargets) > 0 { - targets = strings.Join(scrapeTargets, ",") - } - config := fmt.Sprintf(` -global: - external_labels: - prometheus: %v - replica: %v -scrape_configs: -- job_name: 'myself' - # Quick scrapes for test purposes. - scrape_interval: 1s - scrape_timeout: 1s - static_configs: - - targets: [%s] - relabel_configs: - - source_labels: ['__address__'] - regex: '^.+:80$' - action: drop -`, name, replica, targets) - - if remoteWriteEndpoint != "" { - config = fmt.Sprintf(` -%s -remote_write: -- url: "%s" - # Don't spam receiver on mistake. - queue_config: - min_backoff: 2s - max_backoff: 10s -`, config, remoteWriteEndpoint) - } - - if ruleFile != "" { - config = fmt.Sprintf(` -%s -rule_files: -- "%s" -`, config, ruleFile) - } - - return config -} - func defaultWebConfig() string { // username: test, secret: test(bcrypt hash) return ` @@ -124,7 +71,7 @@ func TestSidecarNotReady(t *testing.T) { testutil.Ok(t, err) t.Cleanup(e2ethanos.CleanScenario(t, e)) - prom, sidecar, err := e2ethanos.NewPrometheusWithSidecar(e, "alone", defaultPromConfig("prom-alone", 0, "", ""), "", e2ethanos.DefaultPrometheusImage(), "") + prom, sidecar, err := e2ethanos.NewPrometheusWithSidecar(e, "alone", e2ethanos.DefaultPromConfig("prom-alone", 0, "", "", e2ethanos.LocalPrometheusTarget), "", e2ethanos.DefaultPrometheusImage(), "") testutil.Ok(t, err) testutil.Ok(t, e2e.StartAndWaitReady(prom, sidecar)) testutil.Ok(t, prom.Stop()) @@ -163,13 +110,13 @@ func TestQuery(t *testing.T) { testutil.Ok(t, err) testutil.Ok(t, e2e.StartAndWaitReady(receiverRunnable)) - prom1, sidecar1, err := e2ethanos.NewPrometheusWithSidecar(e, "alone", defaultPromConfig("prom-alone", 0, "", ""), "", e2ethanos.DefaultPrometheusImage(), "") + prom1, sidecar1, err := e2ethanos.NewPrometheusWithSidecar(e, "alone", e2ethanos.DefaultPromConfig("prom-alone", 0, "", "", e2ethanos.LocalPrometheusTarget), "", e2ethanos.DefaultPrometheusImage(), "") testutil.Ok(t, err) - prom2, sidecar2, err := e2ethanos.NewPrometheusWithSidecar(e, "remote-and-sidecar", defaultPromConfig("prom-both-remote-write-and-sidecar", 1234, e2ethanos.RemoteWriteEndpoint(receiver.Future().InternalEndpoint("remote-write")), ""), "", e2ethanos.DefaultPrometheusImage(), "") + prom2, sidecar2, err := e2ethanos.NewPrometheusWithSidecar(e, "remote-and-sidecar", e2ethanos.DefaultPromConfig("prom-both-remote-write-and-sidecar", 1234, e2ethanos.RemoteWriteEndpoint(receiver.Future().InternalEndpoint("remote-write")), "", e2ethanos.LocalPrometheusTarget), "", e2ethanos.DefaultPrometheusImage(), "") testutil.Ok(t, err) - prom3, sidecar3, err := e2ethanos.NewPrometheusWithSidecar(e, "ha1", defaultPromConfig("prom-ha", 0, "", filepath.Join(e2ethanos.ContainerSharedDir, "", "*.yaml")), "", e2ethanos.DefaultPrometheusImage(), "") + prom3, sidecar3, err := e2ethanos.NewPrometheusWithSidecar(e, "ha1", e2ethanos.DefaultPromConfig("prom-ha", 0, "", filepath.Join(e2ethanos.ContainerSharedDir, "", "*.yaml"), e2ethanos.LocalPrometheusTarget), "", e2ethanos.DefaultPrometheusImage(), "") testutil.Ok(t, err) - prom4, sidecar4, err := e2ethanos.NewPrometheusWithSidecar(e, "ha2", defaultPromConfig("prom-ha", 1, "", filepath.Join(e2ethanos.ContainerSharedDir, "", "*.yaml")), "", e2ethanos.DefaultPrometheusImage(), "") + prom4, sidecar4, err := e2ethanos.NewPrometheusWithSidecar(e, "ha2", e2ethanos.DefaultPromConfig("prom-ha", 1, "", filepath.Join(e2ethanos.ContainerSharedDir, "", "*.yaml"), e2ethanos.LocalPrometheusTarget), "", e2ethanos.DefaultPrometheusImage(), "") testutil.Ok(t, err) testutil.Ok(t, e2e.StartAndWaitReady(prom1, sidecar1, prom2, sidecar2, prom3, sidecar3, prom4, sidecar4)) @@ -184,7 +131,7 @@ func TestQuery(t *testing.T) { testutil.Ok(t, q.WaitSumMetricsWithOptions(e2e.Equals(5), []string{"thanos_store_nodes_grpc_connections"}, e2e.WaitMissingMetrics())) - queryAndAssertSeries(t, ctx, q.Endpoint("http"), queryUpWithoutInstance, time.Now, promclient.QueryOptions{ + queryAndAssertSeries(t, ctx, q.Endpoint("http"), e2ethanos.QueryUpWithoutInstance, time.Now, promclient.QueryOptions{ Deduplicate: false, }, []model.Metric{ { @@ -217,7 +164,7 @@ func TestQuery(t *testing.T) { }) // With deduplication. - queryAndAssertSeries(t, ctx, q.Endpoint("http"), queryUpWithoutInstance, time.Now, promclient.QueryOptions{ + queryAndAssertSeries(t, ctx, q.Endpoint("http"), e2ethanos.QueryUpWithoutInstance, time.Now, promclient.QueryOptions{ Deduplicate: true, }, []model.Metric{ { @@ -317,9 +264,9 @@ func TestQueryLabelNames(t *testing.T) { testutil.Ok(t, err) testutil.Ok(t, e2e.StartAndWaitReady(receiverRunnable)) - prom1, sidecar1, err := e2ethanos.NewPrometheusWithSidecar(e, "alone", defaultPromConfig("prom-alone", 0, "", ""), "", e2ethanos.DefaultPrometheusImage(), "") + prom1, sidecar1, err := e2ethanos.NewPrometheusWithSidecar(e, "alone", e2ethanos.DefaultPromConfig("prom-alone", 0, "", "", e2ethanos.LocalPrometheusTarget), "", e2ethanos.DefaultPrometheusImage(), "") testutil.Ok(t, err) - prom2, sidecar2, err := e2ethanos.NewPrometheusWithSidecar(e, "remote-and-sidecar", defaultPromConfig("prom-both-remote-write-and-sidecar", 1234, e2ethanos.RemoteWriteEndpoint(receiver.Future().InternalEndpoint("remote-write")), ""), "", e2ethanos.DefaultPrometheusImage(), "") + prom2, sidecar2, err := e2ethanos.NewPrometheusWithSidecar(e, "remote-and-sidecar", e2ethanos.DefaultPromConfig("prom-both-remote-write-and-sidecar", 1234, e2ethanos.RemoteWriteEndpoint(receiver.Future().InternalEndpoint("remote-write")), "", e2ethanos.LocalPrometheusTarget), "", e2ethanos.DefaultPrometheusImage(), "") testutil.Ok(t, err) testutil.Ok(t, e2e.StartAndWaitReady(prom1, sidecar1, prom2, sidecar2)) @@ -369,9 +316,9 @@ func TestQueryLabelValues(t *testing.T) { testutil.Ok(t, err) testutil.Ok(t, e2e.StartAndWaitReady(receiverRunnable)) - prom1, sidecar1, err := e2ethanos.NewPrometheusWithSidecar(e, "alone", defaultPromConfig("prom-alone", 0, "", ""), "", e2ethanos.DefaultPrometheusImage(), "") + prom1, sidecar1, err := e2ethanos.NewPrometheusWithSidecar(e, "alone", e2ethanos.DefaultPromConfig("prom-alone", 0, "", "", e2ethanos.LocalPrometheusTarget), "", e2ethanos.DefaultPrometheusImage(), "") testutil.Ok(t, err) - prom2, sidecar2, err := e2ethanos.NewPrometheusWithSidecar(e, "remote-and-sidecar", defaultPromConfig("prom-both-remote-write-and-sidecar", 1234, e2ethanos.RemoteWriteEndpoint(receiver.Future().InternalEndpoint("remote-write")), ""), "", e2ethanos.DefaultPrometheusImage(), "") + prom2, sidecar2, err := e2ethanos.NewPrometheusWithSidecar(e, "remote-and-sidecar", e2ethanos.DefaultPromConfig("prom-both-remote-write-and-sidecar", 1234, e2ethanos.RemoteWriteEndpoint(receiver.Future().InternalEndpoint("remote-write")), ""), "", e2ethanos.DefaultPrometheusImage(), "") testutil.Ok(t, err) testutil.Ok(t, e2e.StartAndWaitReady(prom1, sidecar1, prom2, sidecar2)) @@ -412,7 +359,7 @@ func TestQueryWithAuthorizedSidecar(t *testing.T) { testutil.Ok(t, err) t.Cleanup(e2ethanos.CleanScenario(t, e)) - prom, sidecar, err := e2ethanos.NewPrometheusWithSidecar(e, "alone", defaultPromConfig("prom-alone", 0, "", ""), defaultWebConfig(), e2ethanos.DefaultPrometheusImage(), "") + prom, sidecar, err := e2ethanos.NewPrometheusWithSidecar(e, "alone", e2ethanos.DefaultPromConfig("prom-alone", 0, "", "", e2ethanos.LocalPrometheusTarget), defaultWebConfig(), e2ethanos.DefaultPrometheusImage(), "") testutil.Ok(t, err) testutil.Ok(t, e2e.StartAndWaitReady(prom, sidecar)) @@ -425,7 +372,7 @@ func TestQueryWithAuthorizedSidecar(t *testing.T) { testutil.Ok(t, q.WaitSumMetricsWithOptions(e2e.Equals(1), []string{"thanos_store_nodes_grpc_connections"}, e2e.WaitMissingMetrics())) - queryAndAssertSeries(t, ctx, q.Endpoint("http"), queryUpWithoutInstance, time.Now, promclient.QueryOptions{ + queryAndAssertSeries(t, ctx, q.Endpoint("http"), e2ethanos.QueryUpWithoutInstance, time.Now, promclient.QueryOptions{ Deduplicate: false, }, []model.Metric{ { @@ -470,7 +417,7 @@ func TestQueryCompatibilityWithPreInfoAPI(t *testing.T) { p1, s1, err := e2ethanos.NewPrometheusWithSidecarCustomImage( e, "p1", - defaultPromConfig("p1", 0, "", filepath.Join(e2ethanos.ContainerSharedDir, promRulesSubDir, "*.yaml"), "localhost:9090", qUninit.Future().InternalEndpoint("http")), + e2ethanos.DefaultPromConfig("p1", 0, "", filepath.Join(e2ethanos.ContainerSharedDir, promRulesSubDir, "*.yaml"), e2ethanos.LocalPrometheusTarget, qUninit.Future().InternalEndpoint("http")), "", e2ethanos.DefaultPrometheusImage(), "", @@ -502,7 +449,7 @@ config: // We should have single TCP connection, since all APIs are against the same server. testutil.Ok(t, q.WaitSumMetricsWithOptions(e2e.Equals(1), []string{"thanos_store_nodes_grpc_connections"}, e2e.WaitMissingMetrics())) - queryAndAssertSeries(t, ctx, q.Endpoint("http"), queryUpWithoutInstance, time.Now, promclient.QueryOptions{ + queryAndAssertSeries(t, ctx, q.Endpoint("http"), e2ethanos.QueryUpWithoutInstance, time.Now, promclient.QueryOptions{ Deduplicate: false, }, []model.Metric{ { @@ -652,7 +599,7 @@ func TestSidecarStorePushdown(t *testing.T) { testutil.Ok(t, err) t.Cleanup(e2ethanos.CleanScenario(t, e)) - prom1, sidecar1, err := e2ethanos.NewPrometheusWithSidecar(e, "p1", defaultPromConfig("p1", 0, "", ""), "", e2ethanos.DefaultPrometheusImage(), "", "remote-write-receiver") + prom1, sidecar1, err := e2ethanos.NewPrometheusWithSidecar(e, "p1", e2ethanos.DefaultPromConfig("p1", 0, "", ""), "", e2ethanos.DefaultPrometheusImage(), "", "remote-write-receiver") testutil.Ok(t, err) testutil.Ok(t, e2e.StartAndWaitReady(prom1, sidecar1)) @@ -701,8 +648,9 @@ func TestSidecarStorePushdown(t *testing.T) { testutil.Ok(t, synthesizeSamples(ctx, prom1, []fakeMetricSample{ { - label: "foo", - value: 123, + label: "foo", + value: 123, + timestampUnixNano: now.UnixNano(), }, })) @@ -869,11 +817,11 @@ func TestSidecarQueryEvaluation(t *testing.T) { testutil.Ok(t, err) t.Cleanup(e2ethanos.CleanScenario(t, e)) - prom1, sidecar1, err := e2ethanos.NewPrometheusWithSidecar(e, "p1", defaultPromConfig("p1", 0, "", ""), "", e2ethanos.DefaultPrometheusImage(), "", "remote-write-receiver") + prom1, sidecar1, err := e2ethanos.NewPrometheusWithSidecar(e, "p1", e2ethanos.DefaultPromConfig("p1", 0, "", ""), "", e2ethanos.DefaultPrometheusImage(), "", "remote-write-receiver") testutil.Ok(t, err) testutil.Ok(t, e2e.StartAndWaitReady(prom1, sidecar1)) - prom2, sidecar2, err := e2ethanos.NewPrometheusWithSidecar(e, "p2", defaultPromConfig("p2", 0, "", ""), "", e2ethanos.DefaultPrometheusImage(), "", "remote-write-receiver") + prom2, sidecar2, err := e2ethanos.NewPrometheusWithSidecar(e, "p2", e2ethanos.DefaultPromConfig("p2", 0, "", ""), "", e2ethanos.DefaultPrometheusImage(), "", "remote-write-receiver") testutil.Ok(t, err) testutil.Ok(t, e2e.StartAndWaitReady(prom2, sidecar2)) @@ -1239,11 +1187,11 @@ func TestSidecarQueryEvaluationWithDedup(t *testing.T) { testutil.Ok(t, err) t.Cleanup(e2ethanos.CleanScenario(t, e)) - prom1, sidecar1, err := e2ethanos.NewPrometheusWithSidecar(e, "p1", defaultPromConfig("p1", 0, "", ""), "", e2ethanos.DefaultPrometheusImage(), "", "remote-write-receiver") + prom1, sidecar1, err := e2ethanos.NewPrometheusWithSidecar(e, "p1", e2ethanos.DefaultPromConfig("p1", 0, "", ""), "", e2ethanos.DefaultPrometheusImage(), "", "remote-write-receiver") testutil.Ok(t, err) testutil.Ok(t, e2e.StartAndWaitReady(prom1, sidecar1)) - prom2, sidecar2, err := e2ethanos.NewPrometheusWithSidecar(e, "p2", defaultPromConfig("p1", 1, "", ""), "", e2ethanos.DefaultPrometheusImage(), "", "remote-write-receiver") + prom2, sidecar2, err := e2ethanos.NewPrometheusWithSidecar(e, "p2", e2ethanos.DefaultPromConfig("p1", 1, "", ""), "", e2ethanos.DefaultPrometheusImage(), "", "remote-write-receiver") testutil.Ok(t, err) testutil.Ok(t, e2e.StartAndWaitReady(prom2, sidecar2)) @@ -1283,7 +1231,7 @@ func TestSidecarAlignmentPushdown(t *testing.T) { now := time.Now() - prom1, sidecar1, err := e2ethanos.NewPrometheusWithSidecar(e, "p1", defaultPromConfig("p1", 0, "", ""), "", e2ethanos.DefaultPrometheusImage(), now.Add(time.Duration(-1)*time.Hour).Format(time.RFC3339), now.Format(time.RFC3339), "remote-write-receiver") + prom1, sidecar1, err := e2ethanos.NewPrometheusWithSidecar(e, "p1", e2ethanos.DefaultPromConfig("p1", 0, "", ""), "", e2ethanos.DefaultPrometheusImage(), now.Add(time.Duration(-1)*time.Hour).Format(time.RFC3339), now.Format(time.RFC3339), "remote-write-receiver") testutil.Ok(t, err) testutil.Ok(t, e2e.StartAndWaitReady(prom1, sidecar1)) @@ -1305,17 +1253,17 @@ func TestSidecarAlignmentPushdown(t *testing.T) { ctx, cancel := context.WithTimeout(context.Background(), 1*time.Minute) t.Cleanup(cancel) + samples := make([]fakeMetricSample, 0) for i := now.Add(time.Duration(-3) * time.Hour); i.Before(now); i = i.Add(30 * time.Second) { - testutil.Ok(t, synthesizeSamples(ctx, prom1, []fakeMetricSample{ - { - label: "test", - value: 1, - timestampUnixNano: i.UnixNano(), - }, - })) - + samples = append(samples, fakeMetricSample{ + label: "test", + value: 1, + timestampUnixNano: i.UnixNano(), + }) } + testutil.Ok(t, synthesizeSamples(ctx, prom1, samples)) + // This query should have identical requests. testQuery := func() string { return `max_over_time({instance="test"}[5m])` } diff --git a/test/e2e/receive_test.go b/test/e2e/receive_test.go index fc44ac10d04..8f57e35ff8e 100644 --- a/test/e2e/receive_test.go +++ b/test/e2e/receive_test.go @@ -65,7 +65,7 @@ func TestReceive(t *testing.T) { testutil.Ok(t, e2e.StartAndWaitReady(i)) // Setup Prometheus - prom, _, err := e2ethanos.NewPrometheus(e, "1", defaultPromConfig("prom1", 0, e2ethanos.RemoteWriteEndpoint(i.InternalEndpoint("remote-write")), ""), "", e2ethanos.DefaultPrometheusImage()) + prom, _, err := e2ethanos.NewPrometheus(e, "1", e2ethanos.DefaultPromConfig("prom1", 0, e2ethanos.RemoteWriteEndpoint(i.InternalEndpoint("remote-write")), "", e2ethanos.LocalPrometheusTarget), "", e2ethanos.DefaultPrometheusImage()) testutil.Ok(t, err) testutil.Ok(t, e2e.StartAndWaitReady(prom)) @@ -79,7 +79,7 @@ func TestReceive(t *testing.T) { testutil.Ok(t, q.WaitSumMetricsWithOptions(e2e.Equals(1), []string{"thanos_store_nodes_grpc_connections"}, e2e.WaitMissingMetrics())) // We expect the data from each Prometheus instance to be replicated twice across our ingesting instances - queryAndAssertSeries(t, ctx, q.Endpoint("http"), queryUpWithoutInstance, time.Now, promclient.QueryOptions{ + queryAndAssertSeries(t, ctx, q.Endpoint("http"), e2ethanos.QueryUpWithoutInstance, time.Now, promclient.QueryOptions{ Deduplicate: false, }, []model.Metric{ { @@ -148,11 +148,11 @@ func TestReceive(t *testing.T) { testutil.Ok(t, err) testutil.Ok(t, e2e.StartAndWaitReady(i1, i2, i3, r1)) - prom1, _, err := e2ethanos.NewPrometheus(e, "1", defaultPromConfig("prom1", 0, e2ethanos.RemoteWriteEndpoint(r1.InternalEndpoint("remote-write")), ""), "", e2ethanos.DefaultPrometheusImage()) + prom1, _, err := e2ethanos.NewPrometheus(e, "1", e2ethanos.DefaultPromConfig("prom1", 0, e2ethanos.RemoteWriteEndpoint(r1.InternalEndpoint("remote-write")), "", e2ethanos.LocalPrometheusTarget), "", e2ethanos.DefaultPrometheusImage()) testutil.Ok(t, err) - prom2, _, err := e2ethanos.NewPrometheus(e, "2", defaultPromConfig("prom2", 0, e2ethanos.RemoteWriteEndpoint(r1.InternalEndpoint("remote-write")), ""), "", e2ethanos.DefaultPrometheusImage()) + prom2, _, err := e2ethanos.NewPrometheus(e, "2", e2ethanos.DefaultPromConfig("prom2", 0, e2ethanos.RemoteWriteEndpoint(r1.InternalEndpoint("remote-write")), "", e2ethanos.LocalPrometheusTarget), "", e2ethanos.DefaultPrometheusImage()) testutil.Ok(t, err) - prom3, _, err := e2ethanos.NewPrometheus(e, "3", defaultPromConfig("prom3", 0, e2ethanos.RemoteWriteEndpoint(r1.InternalEndpoint("remote-write")), ""), "", e2ethanos.DefaultPrometheusImage()) + prom3, _, err := e2ethanos.NewPrometheus(e, "3", e2ethanos.DefaultPromConfig("prom3", 0, e2ethanos.RemoteWriteEndpoint(r1.InternalEndpoint("remote-write")), "", e2ethanos.LocalPrometheusTarget), "", e2ethanos.DefaultPrometheusImage()) testutil.Ok(t, err) testutil.Ok(t, e2e.StartAndWaitReady(prom1, prom2, prom3)) @@ -262,9 +262,9 @@ func TestReceive(t *testing.T) { testutil.Ok(t, e2e.StartAndWaitReady(i1, i2, i3, r1, r2)) //Setup Prometheuses - prom1, _, err := e2ethanos.NewPrometheus(e, "1", defaultPromConfig("prom1", 0, e2ethanos.RemoteWriteEndpoint(r1.InternalEndpoint("remote-write")), ""), "", e2ethanos.DefaultPrometheusImage()) + prom1, _, err := e2ethanos.NewPrometheus(e, "1", e2ethanos.DefaultPromConfig("prom1", 0, e2ethanos.RemoteWriteEndpoint(r1.InternalEndpoint("remote-write")), "", e2ethanos.LocalPrometheusTarget), "", e2ethanos.DefaultPrometheusImage()) testutil.Ok(t, err) - prom2, _, err := e2ethanos.NewPrometheus(e, "2", defaultPromConfig("prom2", 0, e2ethanos.RemoteWriteEndpoint(r1.InternalEndpoint("remote-write")), ""), "", e2ethanos.DefaultPrometheusImage()) + prom2, _, err := e2ethanos.NewPrometheus(e, "2", e2ethanos.DefaultPromConfig("prom2", 0, e2ethanos.RemoteWriteEndpoint(r1.InternalEndpoint("remote-write")), "", e2ethanos.LocalPrometheusTarget), "", e2ethanos.DefaultPrometheusImage()) testutil.Ok(t, err) testutil.Ok(t, e2e.StartAndWaitReady(prom1, prom2)) @@ -361,11 +361,11 @@ func TestReceive(t *testing.T) { testutil.Ok(t, err) testutil.Ok(t, e2e.StartAndWaitReady(r1Runnable, r2Runnable, r3Runnable)) - prom1, _, err := e2ethanos.NewPrometheus(e, "1", defaultPromConfig("prom1", 0, e2ethanos.RemoteWriteEndpoint(r1.Future().InternalEndpoint("remote-write")), ""), "", e2ethanos.DefaultPrometheusImage()) + prom1, _, err := e2ethanos.NewPrometheus(e, "1", e2ethanos.DefaultPromConfig("prom1", 0, e2ethanos.RemoteWriteEndpoint(r1.Future().InternalEndpoint("remote-write")), "", e2ethanos.LocalPrometheusTarget), "", e2ethanos.DefaultPrometheusImage()) testutil.Ok(t, err) - prom2, _, err := e2ethanos.NewPrometheus(e, "2", defaultPromConfig("prom2", 0, e2ethanos.RemoteWriteEndpoint(r2.Future().InternalEndpoint("remote-write")), ""), "", e2ethanos.DefaultPrometheusImage()) + prom2, _, err := e2ethanos.NewPrometheus(e, "2", e2ethanos.DefaultPromConfig("prom2", 0, e2ethanos.RemoteWriteEndpoint(r2.Future().InternalEndpoint("remote-write")), "", e2ethanos.LocalPrometheusTarget), "", e2ethanos.DefaultPrometheusImage()) testutil.Ok(t, err) - prom3, _, err := e2ethanos.NewPrometheus(e, "3", defaultPromConfig("prom3", 0, e2ethanos.RemoteWriteEndpoint(r3.Future().InternalEndpoint("remote-write")), ""), "", e2ethanos.DefaultPrometheusImage()) + prom3, _, err := e2ethanos.NewPrometheus(e, "3", e2ethanos.DefaultPromConfig("prom3", 0, e2ethanos.RemoteWriteEndpoint(r3.Future().InternalEndpoint("remote-write")), "", e2ethanos.LocalPrometheusTarget), "", e2ethanos.DefaultPrometheusImage()) testutil.Ok(t, err) testutil.Ok(t, e2e.StartAndWaitReady(prom1, prom2, prom3)) @@ -378,7 +378,7 @@ func TestReceive(t *testing.T) { testutil.Ok(t, q.WaitSumMetricsWithOptions(e2e.Equals(3), []string{"thanos_store_nodes_grpc_connections"}, e2e.WaitMissingMetrics())) - queryAndAssertSeries(t, ctx, q.Endpoint("http"), queryUpWithoutInstance, time.Now, promclient.QueryOptions{ + queryAndAssertSeries(t, ctx, q.Endpoint("http"), e2ethanos.QueryUpWithoutInstance, time.Now, promclient.QueryOptions{ Deduplicate: false, }, []model.Metric{ { @@ -434,11 +434,11 @@ func TestReceive(t *testing.T) { testutil.Ok(t, err) testutil.Ok(t, e2e.StartAndWaitReady(r1Runnable, r2Runnable, r3Runnable)) - prom1, _, err := e2ethanos.NewPrometheus(e, "1", defaultPromConfig("prom1", 0, e2ethanos.RemoteWriteEndpoint(r1.Future().InternalEndpoint("remote-write")), ""), "", e2ethanos.DefaultPrometheusImage()) + prom1, _, err := e2ethanos.NewPrometheus(e, "1", e2ethanos.DefaultPromConfig("prom1", 0, e2ethanos.RemoteWriteEndpoint(r1.Future().InternalEndpoint("remote-write")), "", e2ethanos.LocalPrometheusTarget), "", e2ethanos.DefaultPrometheusImage()) testutil.Ok(t, err) - prom2, _, err := e2ethanos.NewPrometheus(e, "2", defaultPromConfig("prom2", 0, e2ethanos.RemoteWriteEndpoint(r2.Future().InternalEndpoint("remote-write")), ""), "", e2ethanos.DefaultPrometheusImage()) + prom2, _, err := e2ethanos.NewPrometheus(e, "2", e2ethanos.DefaultPromConfig("prom2", 0, e2ethanos.RemoteWriteEndpoint(r2.Future().InternalEndpoint("remote-write")), "", e2ethanos.LocalPrometheusTarget), "", e2ethanos.DefaultPrometheusImage()) testutil.Ok(t, err) - prom3, _, err := e2ethanos.NewPrometheus(e, "3", defaultPromConfig("prom3", 0, e2ethanos.RemoteWriteEndpoint(r3.Future().InternalEndpoint("remote-write")), ""), "", e2ethanos.DefaultPrometheusImage()) + prom3, _, err := e2ethanos.NewPrometheus(e, "3", e2ethanos.DefaultPromConfig("prom3", 0, e2ethanos.RemoteWriteEndpoint(r3.Future().InternalEndpoint("remote-write")), "", e2ethanos.LocalPrometheusTarget), "", e2ethanos.DefaultPrometheusImage()) testutil.Ok(t, err) testutil.Ok(t, e2e.StartAndWaitReady(prom1, prom2, prom3)) @@ -451,7 +451,7 @@ func TestReceive(t *testing.T) { testutil.Ok(t, q.WaitSumMetricsWithOptions(e2e.Equals(3), []string{"thanos_store_nodes_grpc_connections"}, e2e.WaitMissingMetrics())) - queryAndAssertSeries(t, ctx, q.Endpoint("http"), queryUpWithoutInstance, time.Now, promclient.QueryOptions{ + queryAndAssertSeries(t, ctx, q.Endpoint("http"), e2ethanos.QueryUpWithoutInstance, time.Now, promclient.QueryOptions{ Deduplicate: false, }, []model.Metric{ { @@ -511,7 +511,7 @@ func TestReceive(t *testing.T) { testutil.Ok(t, err) testutil.Ok(t, e2e.StartAndWaitReady(r1Runnable, r2Runnable, r3Runnable)) - prom1, _, err := e2ethanos.NewPrometheus(e, "1", defaultPromConfig("prom1", 0, e2ethanos.RemoteWriteEndpoint(r1.Future().InternalEndpoint("remote-write")), ""), "", e2ethanos.DefaultPrometheusImage()) + prom1, _, err := e2ethanos.NewPrometheus(e, "1", e2ethanos.DefaultPromConfig("prom1", 0, e2ethanos.RemoteWriteEndpoint(r1.Future().InternalEndpoint("remote-write")), "", e2ethanos.LocalPrometheusTarget), "", e2ethanos.DefaultPrometheusImage()) testutil.Ok(t, err) testutil.Ok(t, e2e.StartAndWaitReady(prom1)) @@ -524,7 +524,7 @@ func TestReceive(t *testing.T) { testutil.Ok(t, q.WaitSumMetricsWithOptions(e2e.Equals(3), []string{"thanos_store_nodes_grpc_connections"}, e2e.WaitMissingMetrics())) - queryAndAssertSeries(t, ctx, q.Endpoint("http"), queryUpWithoutInstance, time.Now, promclient.QueryOptions{ + queryAndAssertSeries(t, ctx, q.Endpoint("http"), e2ethanos.QueryUpWithoutInstance, time.Now, promclient.QueryOptions{ Deduplicate: false, }, []model.Metric{ { @@ -581,7 +581,7 @@ func TestReceive(t *testing.T) { testutil.Ok(t, err) testutil.Ok(t, e2e.StartAndWaitReady(r1Runnable, r2Runnable)) - prom1, _, err := e2ethanos.NewPrometheus(e, "1", defaultPromConfig("prom1", 0, e2ethanos.RemoteWriteEndpoint(r1.Future().InternalEndpoint("remote-write")), ""), "", e2ethanos.DefaultPrometheusImage()) + prom1, _, err := e2ethanos.NewPrometheus(e, "1", e2ethanos.DefaultPromConfig("prom1", 0, e2ethanos.RemoteWriteEndpoint(r1.Future().InternalEndpoint("remote-write")), "", e2ethanos.LocalPrometheusTarget), "", e2ethanos.DefaultPrometheusImage()) testutil.Ok(t, err) testutil.Ok(t, e2e.StartAndWaitReady(prom1)) @@ -594,7 +594,7 @@ func TestReceive(t *testing.T) { testutil.Ok(t, q.WaitSumMetricsWithOptions(e2e.Equals(2), []string{"thanos_store_nodes_grpc_connections"}, e2e.WaitMissingMetrics())) - queryAndAssertSeries(t, ctx, q.Endpoint("http"), queryUpWithoutInstance, time.Now, promclient.QueryOptions{ + queryAndAssertSeries(t, ctx, q.Endpoint("http"), e2ethanos.QueryUpWithoutInstance, time.Now, promclient.QueryOptions{ Deduplicate: false, }, []model.Metric{ { @@ -640,9 +640,9 @@ func TestReceive(t *testing.T) { testutil.Ok(t, err) testutil.Ok(t, e2e.StartAndWaitReady(rp1, rp2)) - prom1, _, err := e2ethanos.NewPrometheus(e, "1", defaultPromConfig("prom1", 0, "http://"+rp1.InternalEndpoint("http")+"/api/v1/receive", ""), "", e2ethanos.DefaultPrometheusImage()) + prom1, _, err := e2ethanos.NewPrometheus(e, "1", e2ethanos.DefaultPromConfig("prom1", 0, "http://"+rp1.InternalEndpoint("http")+"/api/v1/receive", "", e2ethanos.LocalPrometheusTarget), "", e2ethanos.DefaultPrometheusImage()) testutil.Ok(t, err) - prom2, _, err := e2ethanos.NewPrometheus(e, "2", defaultPromConfig("prom2", 0, "http://"+rp2.InternalEndpoint("http")+"/api/v1/receive", ""), "", e2ethanos.DefaultPrometheusImage()) + prom2, _, err := e2ethanos.NewPrometheus(e, "2", e2ethanos.DefaultPromConfig("prom2", 0, "http://"+rp2.InternalEndpoint("http")+"/api/v1/receive", "", e2ethanos.LocalPrometheusTarget), "", e2ethanos.DefaultPrometheusImage()) testutil.Ok(t, err) testutil.Ok(t, e2e.StartAndWaitReady(prom1, prom2)) @@ -653,7 +653,7 @@ func TestReceive(t *testing.T) { t.Cleanup(cancel) testutil.Ok(t, q.WaitSumMetricsWithOptions(e2e.Equals(1), []string{"thanos_store_nodes_grpc_connections"}, e2e.WaitMissingMetrics())) - queryAndAssertSeries(t, ctx, q.Endpoint("http"), queryUpWithoutInstance, time.Now, promclient.QueryOptions{ + queryAndAssertSeries(t, ctx, q.Endpoint("http"), e2ethanos.QueryUpWithoutInstance, time.Now, promclient.QueryOptions{ Deduplicate: false, }, []model.Metric{ { diff --git a/test/e2e/rules_api_test.go b/test/e2e/rules_api_test.go index 31a530fe472..624837cfc3f 100644 --- a/test/e2e/rules_api_test.go +++ b/test/e2e/rules_api_test.go @@ -50,7 +50,7 @@ func TestRulesAPI_Fanout(t *testing.T) { prom1, sidecar1, err := e2ethanos.NewPrometheusWithSidecar( e, "prom1", - defaultPromConfig("ha", 0, "", filepath.Join(e2ethanos.ContainerSharedDir, promRulesSubDir, "*.yaml")), + e2ethanos.DefaultPromConfig("ha", 0, "", filepath.Join(e2ethanos.ContainerSharedDir, promRulesSubDir, "*.yaml"), e2ethanos.LocalPrometheusTarget), "", e2ethanos.DefaultPrometheusImage(), "", ) @@ -58,7 +58,7 @@ func TestRulesAPI_Fanout(t *testing.T) { prom2, sidecar2, err := e2ethanos.NewPrometheusWithSidecar( e, "prom2", - defaultPromConfig("ha", 1, "", filepath.Join(e2ethanos.ContainerSharedDir, promRulesSubDir, "*.yaml")), + e2ethanos.DefaultPromConfig("ha", 1, "", filepath.Join(e2ethanos.ContainerSharedDir, promRulesSubDir, "*.yaml"), e2ethanos.LocalPrometheusTarget), "", e2ethanos.DefaultPrometheusImage(), "", ) diff --git a/test/e2e/targets_api_test.go b/test/e2e/targets_api_test.go index 9ff72509900..c8d59019653 100644 --- a/test/e2e/targets_api_test.go +++ b/test/e2e/targets_api_test.go @@ -35,7 +35,7 @@ func TestTargetsAPI_Fanout(t *testing.T) { prom1, sidecar1, err := e2ethanos.NewPrometheusWithSidecar( e, "prom1", - defaultPromConfig("ha", 0, "", "", "localhost:9090", "localhost:80"), + e2ethanos.DefaultPromConfig("ha", 0, "", "", e2ethanos.LocalPrometheusTarget, "localhost:80"), "", e2ethanos.DefaultPrometheusImage(), "", ) @@ -43,7 +43,7 @@ func TestTargetsAPI_Fanout(t *testing.T) { prom2, sidecar2, err := e2ethanos.NewPrometheusWithSidecar( e, "prom2", - defaultPromConfig("ha", 1, "", "", "localhost:9090", "localhost:80"), + e2ethanos.DefaultPromConfig("ha", 1, "", "", e2ethanos.LocalPrometheusTarget, "localhost:80"), "", e2ethanos.DefaultPrometheusImage(), "", ) From 41977e0eeff673ff64af5d464dbf24ed2227d173 Mon Sep 17 00:00:00 2001 From: fpetkovski Date: Wed, 16 Mar 2022 11:26:17 +0100 Subject: [PATCH 18/24] makefile: Fix Makefile support for arm64 Currently the Makefile does not properly detect arm64 architecture through uname -m. It assumes that all arm64 CPUs will be detected as armv8. This improves the detection by handling arm64 as a separate case. It also formats the Makefile to use tabs instead of spaces in order to prevent failures on OS X. Signed-off-by: fpetkovski --- Makefile | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/Makefile b/Makefile index 86e5bbb57d6..64854d22e32 100644 --- a/Makefile +++ b/Makefile @@ -17,13 +17,16 @@ arch = $(shell uname -m) # The include .busybox-versions includes the SHA's of all the platforms, which can be used as var. ifeq ($(arch), x86_64) - # amd64 - BASE_DOCKER_SHA=${amd64} + # amd64 + BASE_DOCKER_SHA=${amd64} else ifeq ($(arch), armv8) - # arm64 - BASE_DOCKER_SHA=${arm64} + # arm64 + BASE_DOCKER_SHA=${arm64} +else ifeq ($(arch), arm64) + # arm64 + BASE_DOCKER_SHA=${arm64} else - echo >&2 "only support amd64 or arm64 arch" && exit 1 + echo >&2 "only support amd64 or arm64 arch" && exit 1 endif DOCKER_ARCHS ?= amd64 arm64 # Generate two target: docker-xxx-amd64, docker-xxx-arm64. From 72d63e365a3ab53090a1bae1e03237a15d72f814 Mon Sep 17 00:00:00 2001 From: Matej Gera <38492574+matej-g@users.noreply.github.com> Date: Thu, 17 Mar 2022 21:47:57 +0100 Subject: [PATCH 19/24] Tracing: Migrate Google Cloud (Stackdriver) client to OpenTelemetry (#4838) * Add migration utilities - Add method to create bridge tracer - Implement a sampler which enables us to force tracing Signed-off-by: Matej Gera * Migrate and rename Stackdriver to Google Cloud - Rename to Google Cloud, keep it backwards compatible - Refactor and move to OTEL exporter - Adjust factory to use bridge tracer for this provider Signed-off-by: Matej Gera * Adjust HTTP middleware - to ensure force tracing / populate trace ID header works for the bridge tracer as well Signed-off-by: Matej Gera * Update documentation Signed-off-by: Matej Gera * Copyright file headers Signed-off-by: Matej Gera * Fix doc formatting Signed-off-by: Matej Gera * Upgrade OTEL to 1.3.0 Signed-off-by: Matej Gera * Wrap bridge tracer to make propagation work - Currently, bridge tracer supports only HTTP headers as a carrier.However, our instrumentation e.g. for gRPC uses metatada.MD as a carrier instead, breaking the propagatin. This fix works around it by 'converting' the carrier to HTTP header. See code docs for details. Signed-off-by: Matej Gera * Remove leftover go.mod replace Signed-off-by: Matej Gera * Update version; fix constants naming Signed-off-by: Matej Gera --- docs/tracing.md | 10 +- examples/interactive/interactive_test.go | 2 +- go.mod | 12 +- go.sum | 47 ++++- pkg/tracing/client/factory.go | 27 +-- pkg/tracing/google_cloud/google_cloud.go | 85 +++++++++ pkg/tracing/google_cloud/google_cloud_test.go | 164 +++++++++++++++++ pkg/tracing/grpc.go | 2 + pkg/tracing/http.go | 19 +- pkg/tracing/migration/bridge.go | 129 ++++++++++++++ pkg/tracing/migration/sampler.go | 47 +++++ pkg/tracing/stackdriver/tracer_test.go | 165 ------------------ scripts/cfggen/main.go | 10 +- 13 files changed, 529 insertions(+), 190 deletions(-) create mode 100644 pkg/tracing/google_cloud/google_cloud.go create mode 100644 pkg/tracing/google_cloud/google_cloud_test.go create mode 100644 pkg/tracing/migration/bridge.go create mode 100644 pkg/tracing/migration/sampler.go delete mode 100644 pkg/tracing/stackdriver/tracer_test.go diff --git a/docs/tracing.md b/docs/tracing.md index 59a4b30b52f..df8558d8848 100644 --- a/docs/tracing.md +++ b/docs/tracing.md @@ -97,12 +97,16 @@ config: traceid_128bit: false ``` -### Stackdriver +### Google Cloud (formerly Stackdriver) Client for https://cloud.google.com/trace/ tracing. -```yaml mdox-exec="go run scripts/cfggen/main.go --name=stackdriver.Config" -type: STACKDRIVER +You will also need to ensure that the authentication with the API is working, follow [this guide](https://cloud.google.com/trace/docs/setup/go-ot#configure_your_platform) to set it up. + +*Note:* The `type` in the configuration below can have either value `GOOGLE_CLOUD` or `STACKDRIVER` - this is to ensure backwards compatibility. + +```yaml mdox-exec="go run scripts/cfggen/main.go --name=google_cloud.Config" +type: GOOGLE_CLOUD config: service_name: "" project_id: "" diff --git a/examples/interactive/interactive_test.go b/examples/interactive/interactive_test.go index 182fc922903..a635384cf48 100644 --- a/examples/interactive/interactive_test.go +++ b/examples/interactive/interactive_test.go @@ -143,7 +143,7 @@ func TestReadOnlyThanosSetup(t *testing.T) { testutil.Ok(t, e2e.StartAndWaitReady(j)) jaegerConfig, err := yaml.Marshal(tracingclient.TracingConfig{ - Type: tracingclient.JAEGER, + Type: tracingclient.Jaeger, Config: jaeger.Config{ ServiceName: "thanos", SamplerType: "const", diff --git a/go.mod b/go.mod index d1c2b2400ba..8d11fb391ab 100644 --- a/go.mod +++ b/go.mod @@ -7,6 +7,7 @@ require ( github.com/Azure/azure-storage-blob-go v0.13.0 github.com/Azure/go-autorest/autorest/adal v0.9.17 github.com/Azure/go-autorest/autorest/azure/auth v0.5.8 + github.com/GoogleCloudPlatform/opentelemetry-operations-go/exporter/trace v1.0.0 github.com/NYTimes/gziphandler v1.1.1 github.com/alecthomas/units v0.0.0-20210927113745-59d0afb8317a github.com/alicebob/miniredis/v2 v2.14.3 @@ -74,6 +75,11 @@ require ( github.com/weaveworks/common v0.0.0-20210913144402-035033b78a78 go.elastic.co/apm v1.11.0 go.elastic.co/apm/module/apmot v1.11.0 + go.opentelemetry.io/contrib/propagators/ot v1.4.0 + go.opentelemetry.io/otel v1.5.0 + go.opentelemetry.io/otel/bridge/opentracing v1.5.0 + go.opentelemetry.io/otel/sdk v1.5.0 + go.opentelemetry.io/otel/trace v1.5.0 go.uber.org/atomic v1.9.0 go.uber.org/automaxprocs v1.4.0 go.uber.org/goleak v1.1.12 @@ -132,8 +138,10 @@ require ( github.com/elastic/go-windows v1.0.1 // indirect github.com/envoyproxy/go-control-plane v0.10.1 // indirect github.com/envoyproxy/protoc-gen-validate v0.6.2 // indirect - github.com/felixge/httpsnoop v1.0.1 // indirect + github.com/felixge/httpsnoop v1.0.2 // indirect github.com/go-logfmt/logfmt v0.5.1 // indirect + github.com/go-logr/logr v1.2.2 // indirect + github.com/go-logr/stdr v1.2.2 // indirect github.com/go-openapi/analysis v0.20.0 // indirect github.com/go-openapi/errors v0.20.0 // indirect github.com/go-openapi/jsonpointer v0.19.5 // indirect @@ -151,7 +159,7 @@ require ( github.com/gogo/googleapis v1.4.0 // indirect github.com/golang-jwt/jwt/v4 v4.0.0 // indirect github.com/golang/protobuf v1.5.2 // indirect - github.com/google/go-cmp v0.5.6 // indirect + github.com/google/go-cmp v0.5.7 // indirect github.com/google/go-querystring v1.0.0 // indirect github.com/google/pprof v0.0.0-20211008130755-947d60d73cc0 // indirect github.com/google/uuid v1.2.0 // indirect diff --git a/go.sum b/go.sum index a27aef6d506..bb4f8b1440f 100644 --- a/go.sum +++ b/go.sum @@ -24,6 +24,7 @@ cloud.google.com/go v0.81.0/go.mod h1:mk/AM35KwGk/Nm2YSeZbxXdrNK3KZOYHmLkOqC2V6E cloud.google.com/go v0.83.0/go.mod h1:Z7MJUsANfY0pYPdw0lbnivPx4/vhy/e2FEkSkF7vAVY= cloud.google.com/go v0.84.0/go.mod h1:RazrYuxIK6Kb7YrzzhPoLmCVzl7Sup4NrbKPg8KHSUM= cloud.google.com/go v0.87.0/go.mod h1:TpDYlFy7vuLzZMMZ+B6iRiELaY7z/gJPaqbMx6mlWcY= +cloud.google.com/go v0.88.0/go.mod h1:dnKwfYbP9hQhefiUvpbcAyoGSHUrOxR20JVElLiUvEY= cloud.google.com/go v0.90.0/go.mod h1:kRX0mNRHe0e2rC6oNakvwQqzyDmg57xJ+SZU1eT2aDQ= cloud.google.com/go v0.92.2/go.mod h1:8utlLll2EF5XMAV15woO4lSbWQlk8rer9aLOfLh7+YI= cloud.google.com/go v0.93.3/go.mod h1:8utlLll2EF5XMAV15woO4lSbWQlk8rer9aLOfLh7+YI= @@ -123,6 +124,8 @@ github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03 github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo= github.com/DATA-DOG/go-sqlmock v1.4.1/go.mod h1:f/Ixk793poVmq4qj/V1dPUg2JEAKC73Q5eFN3EC/SaM= github.com/DataDog/datadog-go v3.2.0+incompatible/go.mod h1:LButxg5PwREeZtORoXG3tL4fMGNddJ+vMq1mwgfaqoQ= +github.com/GoogleCloudPlatform/opentelemetry-operations-go/exporter/trace v1.0.0 h1:38fNtfhHY6bs22b/D6+hDzO6JR0rDzpGPD36dY2uPL4= +github.com/GoogleCloudPlatform/opentelemetry-operations-go/exporter/trace v1.0.0/go.mod h1:jE23wM1jvwSKgdGcoOkj5j9n1VWtncW36pL2bK1JU+0= github.com/HdrHistogram/hdrhistogram-go v1.1.0/go.mod h1:yDgFjdqOqDEKOvasDdhWNXYg9BVp4O+o5f6V/ehm6Oo= github.com/HdrHistogram/hdrhistogram-go v1.1.2 h1:5IcZpTvzydCQeHzK4Ef/D5rrSqwxob0t8PQPMybUNFM= github.com/HdrHistogram/hdrhistogram-go v1.1.2/go.mod h1:yDgFjdqOqDEKOvasDdhWNXYg9BVp4O+o5f6V/ehm6Oo= @@ -585,8 +588,9 @@ github.com/fatih/structtag v1.1.0 h1:6j4mUV/ES2duvnAzKMFkN6/A5mCaNYPD3xfbAkLLOF8 github.com/fatih/structtag v1.1.0/go.mod h1:mBJUNpUnHmRKrKlQQlmCrh5PuhftFbNv8Ys4/aAZl94= github.com/felixge/fgprof v0.9.1 h1:E6FUJ2Mlv043ipLOCFqo8+cHo9MhQ203E2cdEK/isEs= github.com/felixge/fgprof v0.9.1/go.mod h1:7/HK6JFtFaARhIljgP2IV8rJLIoHDoOYoUphsnGvqxE= -github.com/felixge/httpsnoop v1.0.1 h1:lvB5Jl89CsZtGIWuTcDM1E/vkVs49/Ml7JJe07l8SPQ= github.com/felixge/httpsnoop v1.0.1/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U= +github.com/felixge/httpsnoop v1.0.2 h1:+nS9g82KMXccJ/wp0zyRW9ZBHFETmMGtkk+2CTTrW4o= +github.com/felixge/httpsnoop v1.0.2/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U= github.com/fogleman/gg v1.2.1-0.20190220221249-0403632d5b90/go.mod h1:R/bRT+9gY/C5z7JzPU0zXsXHKM4/ayA+zqcVNZzPa1k= github.com/form3tech-oss/jwt-go v3.2.2+incompatible/go.mod h1:pbq4aXjuKjdthFRnoDwaVPLA+WlJuPGy+QneDUgJi2k= github.com/form3tech-oss/jwt-go v3.2.3+incompatible/go.mod h1:pbq4aXjuKjdthFRnoDwaVPLA+WlJuPGy+QneDUgJi2k= @@ -632,6 +636,10 @@ github.com/go-logfmt/logfmt v0.5.0/go.mod h1:wCYkCAKZfumFQihp8CzCvQ3paCTfi41vtzG github.com/go-logfmt/logfmt v0.5.1 h1:otpy5pqBCBZ1ng9RQ0dPu4PN7ba75Y/aA+UpowDyNVA= github.com/go-logfmt/logfmt v0.5.1/go.mod h1:WYhtIu8zTZfxdn5+rREduYbwxfcBr/Vr6KEVveWlfTs= github.com/go-logr/logr v0.2.0/go.mod h1:z6/tIYblkpsD+a4lm/fGIIU9mZ+XfAiaFtq7xTgseGU= +github.com/go-logr/logr v1.2.2 h1:ahHml/yUpnlb96Rp8HCvtYVPY8ZYpxq3g7UYchIYwbs= +github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= +github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= +github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= github.com/go-openapi/analysis v0.0.0-20180825180245-b006789cd277/go.mod h1:k70tL6pCuVxPJOHXQ+wIac1FUrvNkHolPie/cLEU6hI= github.com/go-openapi/analysis v0.17.0/go.mod h1:IowGgpVeD0vNm45So8nr+IcQ3pxVtpRoBWb8PVZO0ik= github.com/go-openapi/analysis v0.18.0/go.mod h1:IowGgpVeD0vNm45So8nr+IcQ3pxVtpRoBWb8PVZO0ik= @@ -887,8 +895,9 @@ github.com/google/go-cmp v0.5.2/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/ github.com/google/go-cmp v0.5.3/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.4/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= -github.com/google/go-cmp v0.5.6 h1:BKbKCqvP6I+rmFHt06ZmyQtvB8xAkWdhFyr0ZUNZcxQ= github.com/google/go-cmp v0.5.6/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.7 h1:81/ik6ipDQS2aGcBfIN5dHDB36BwrStyeAQquSYCV4o= +github.com/google/go-cmp v0.5.7/go.mod h1:n+brtR0CgQNWTVd5ZUFpTBC8YFBDLK/h/bpaJ8/DtOE= github.com/google/go-github v17.0.0+incompatible/go.mod h1:zLgOLi98H3fifZn+44m+umXrS52loVEgC2AApnigrVQ= github.com/google/go-querystring v1.0.0 h1:Xkwi/a1rcvNg1PPYe5vI8GbeBY/jrVuDX5ASuANWTrk= github.com/google/go-querystring v1.0.0/go.mod h1:odCYkC5MyYFN7vkCjXpyrEuKhc/BUO6wN/zVPAxq5ck= @@ -916,6 +925,7 @@ github.com/google/pprof v0.0.0-20210122040257-d980be63207e/go.mod h1:kpwsk12EmLe github.com/google/pprof v0.0.0-20210226084205-cbba55b83ad5/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE= github.com/google/pprof v0.0.0-20210601050228-01bbb1931b22/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE= github.com/google/pprof v0.0.0-20210609004039-a478d1d731e9/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE= +github.com/google/pprof v0.0.0-20210715191844-86eeefc3e471/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE= github.com/google/pprof v0.0.0-20210720184732-4bb14d4b1be1/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE= github.com/google/pprof v0.0.0-20211008130755-947d60d73cc0 h1:zHs+jv3LO743/zFGcByu2KmpbliCU2AhjcGgrdTwSG4= github.com/google/pprof v0.0.0-20211008130755-947d60d73cc0/go.mod h1:KgnwoLYCZ8IQu3XUZ8Nc/bM9CCZFOyjUNOSygVozoDg= @@ -938,6 +948,8 @@ github.com/googleapis/gnostic v0.4.1/go.mod h1:LRhVm6pbyptWbWbuZ38d1eyptfvIytN3i github.com/googleapis/gnostic v0.5.1/go.mod h1:6U4PtQXGIEt/Z3h5MAT7FNofLnw9vXk2cUuW7uA/OeU= github.com/googleapis/gnostic v0.5.5 h1:9fHAtK0uDfpveeqqo1hkEZJcFvYXAiCN3UutL8F9xHw= github.com/googleapis/gnostic v0.5.5/go.mod h1:7+EbHbldMins07ALC74bsA81Ovc97DwqyJO1AENw9kA= +github.com/googleinterns/cloud-operations-api-mock v0.0.0-20200709193332-a1e58c29bdd3 h1:eHv/jVY/JNop1xg2J9cBb4EzyMpWZoNCP1BslSAIkOI= +github.com/googleinterns/cloud-operations-api-mock v0.0.0-20200709193332-a1e58c29bdd3/go.mod h1:h/KNeRx7oYU4SpA4SoY7W2/NxDKEEVuwA6j9A27L4OI= github.com/gophercloud/gophercloud v0.6.0/go.mod h1:GICNByuaEBibcjmjvI7QvYJSZEbGkcYwAR7EZK2WMqM= github.com/gophercloud/gophercloud v0.12.0/go.mod h1:gmC5oQqMDOMO1t1gq5DquX/yAU808e/4mzjjDA76+Ss= github.com/gophercloud/gophercloud v0.13.0/go.mod h1:VX0Ibx85B60B5XOrZr6kaNwrmPUzcmMpwxvQ1WQIIWM= @@ -1818,7 +1830,32 @@ go.opencensus.io v0.22.4/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw= go.opencensus.io v0.22.5/go.mod h1:5pWMHQbX5EPX2/62yrJeAkowc+lfs/XD7Uxpq3pI6kk= go.opencensus.io v0.23.0 h1:gqCw0LfLxScz8irSi8exQc7fyQ0fKQU/qnC/X8+V/1M= go.opencensus.io v0.23.0/go.mod h1:XItmlyltB5F7CS4xOC1DcqMoFqwtC6OG2xF7mCv7P7E= +go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.24.0 h1:qW6j1kJU24yo2xIu16Py4m4AXn1dd+s2uKllGnTFAm0= +go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.24.0/go.mod h1:7W3JSDYTtH3qKKHrS1fMiwLtK7iZFLPq1+7htfspX/E= +go.opentelemetry.io/contrib/propagators/ot v1.4.0 h1:sHp8P5+xmMORvsgKjIPPX4U97JUgSqY4xPWa6ncF1PA= +go.opentelemetry.io/contrib/propagators/ot v1.4.0/go.mod h1:FivzsGJqC7ND++UUOifWfkiuEOFXtVQ3fh2ZkqIJ9X4= go.opentelemetry.io/otel v0.11.0/go.mod h1:G8UCk+KooF2HLkgo8RHX9epABH/aRGYET7gQOqBVdB0= +go.opentelemetry.io/otel v1.0.0-RC3/go.mod h1:Ka5j3ua8tZs4Rkq4Ex3hwgBgOchyPVq5S6P2lz//nKQ= +go.opentelemetry.io/otel v1.0.0/go.mod h1:AjRVh9A5/5DE7S+mZtTR6t8vpKKryam+0lREnfmS4cg= +go.opentelemetry.io/otel v1.0.1/go.mod h1:OPEOD4jIT2SlZPMmwT6FqZz2C0ZNdQqiWcoK6M0SNFU= +go.opentelemetry.io/otel v1.4.0/go.mod h1:jeAqMFKy2uLIxCtKxoFj0FAL5zAPKQagc3+GtBWakzk= +go.opentelemetry.io/otel v1.5.0 h1:DhCU8oR2sJH9rfnwPdoV/+BJ7UIN5kXHL8DuSGrPU8E= +go.opentelemetry.io/otel v1.5.0/go.mod h1:Jm/m+rNp/z0eqJc74H7LPwQ3G87qkU/AnnAydAjSAHk= +go.opentelemetry.io/otel/bridge/opentracing v1.5.0 h1:fOaCCGOLhDRea4Hv+P+Z8d4JtPEf3SSuNJFT4diV5v8= +go.opentelemetry.io/otel/bridge/opentracing v1.5.0/go.mod h1:Pci42D1Wz/eZzWeKwGPgqu89bQeak3DdIeZhzGNFu8s= +go.opentelemetry.io/otel/internal/metric v0.23.0 h1:mPfzm9Iqhw7G2nDBmUAjFTfPqLZPbOW2k7QI57ITbaI= +go.opentelemetry.io/otel/internal/metric v0.23.0/go.mod h1:z+RPiDJe30YnCrOhFGivwBS+DU1JU/PiLKkk4re2DNY= +go.opentelemetry.io/otel/metric v0.23.0 h1:mYCcDxi60P4T27/0jchIDFa1WHEfQeU3zH9UEMpnj2c= +go.opentelemetry.io/otel/metric v0.23.0/go.mod h1:G/Nn9InyNnIv7J6YVkQfpc0JCfKBNJaERBGw08nqmVQ= +go.opentelemetry.io/otel/sdk v1.0.1/go.mod h1:HrdXne+BiwsOHYYkBE5ysIcv2bvdZstxzmCQhxTcZkI= +go.opentelemetry.io/otel/sdk v1.5.0 h1:QKhWBbcOC9fDCZKCfPFjWTWpfIlJR+i9xiUDYrLVmZs= +go.opentelemetry.io/otel/sdk v1.5.0/go.mod h1:CU4J1v+7iEljnm1G14QjdFWOXUyYLHVh0Lh+/BTYyFg= +go.opentelemetry.io/otel/trace v1.0.0-RC3/go.mod h1:VUt2TUYd8S2/ZRX09ZDFZQwn2RqfMB5MzO17jBojGxo= +go.opentelemetry.io/otel/trace v1.0.0/go.mod h1:PXTWqayeFUlJV1YDNhsJYB184+IvAH814St6o6ajzIs= +go.opentelemetry.io/otel/trace v1.0.1/go.mod h1:5g4i4fKLaX2BQpSBsxw8YYcgKpMMSW3x7ZTuYBr3sUk= +go.opentelemetry.io/otel/trace v1.4.0/go.mod h1:uc3eRsqDfWs9R7b92xbQbU42/eTNz4N+gLP8qJCi4aE= +go.opentelemetry.io/otel/trace v1.5.0 h1:AKQZ9zJsBRFAp7zLdyGNkqG2rToCDIt3i5tcLzQlbmU= +go.opentelemetry.io/otel/trace v1.5.0/go.mod h1:sq55kfhjXYr1zVSyexg0w1mpa03AYXR5eyTkB9NPPdE= go.opentelemetry.io/proto/otlp v0.7.0/go.mod h1:PqfVotwruBrMGOCsRd/89rSnXhoiJIqeYNgFYFoEGnI= go.uber.org/atomic v1.3.2/go.mod h1:gD2HeocX3+yG+ygLZcrzQJaqmWj9AIm7n08wl/qW/PE= go.uber.org/atomic v1.4.0/go.mod h1:gD2HeocX3+yG+ygLZcrzQJaqmWj9AIm7n08wl/qW/PE= @@ -2005,6 +2042,7 @@ golang.org/x/net v0.0.0-20210503060351-7fd8e65b6420/go.mod h1:9nx3DQGgdP8bBQD5qx golang.org/x/net v0.0.0-20210520170846-37e1c6afe023/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= golang.org/x/net v0.0.0-20210525063256-abc453219eb5/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= golang.org/x/net v0.0.0-20210614182718-04defd469f4e/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= +golang.org/x/net v0.0.0-20210716203947-853a461950ff/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= golang.org/x/net v0.0.0-20210726213435-c6fcb2dbf985/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= golang.org/x/net v0.0.0-20210805182204-aaa1db679c0d/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= golang.org/x/net v0.0.0-20210813160813-60bc85c4be6d/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= @@ -2161,6 +2199,7 @@ golang.org/x/sys v0.0.0-20210324051608-47abb6519492/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20210330210617-4fbd30eecc44/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210403161142-5e06dd20ab57/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210423185535-09eb48e85fd7/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210426230700-d19ff857e887/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210503080704-8803ae5d1324/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210510120138-977fb7262007/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= @@ -2273,6 +2312,7 @@ golang.org/x/tools v0.0.0-20200513201620-d5fe73897c97/go.mod h1:EkVYQZoAsY45+roY golang.org/x/tools v0.0.0-20200515010526-7d3b6ebf133d/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= golang.org/x/tools v0.0.0-20200618134242-20370b0cb4b2/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= +golang.org/x/tools v0.0.0-20200701151220-7cb253f4c4f8/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= golang.org/x/tools v0.0.0-20200721032237-77f530d86f9a/go.mod h1:njjCfa9FT2d7l9Bc6FUM5FLjQPp3cFF28FI3qnDFljA= golang.org/x/tools v0.0.0-20200729194436-6467de6f59a7/go.mod h1:njjCfa9FT2d7l9Bc6FUM5FLjQPp3cFF28FI3qnDFljA= golang.org/x/tools v0.0.0-20200804011535-6c149bb5ef0d/go.mod h1:njjCfa9FT2d7l9Bc6FUM5FLjQPp3cFF28FI3qnDFljA= @@ -2388,6 +2428,7 @@ google.golang.org/genproto v0.0.0-20200511104702-f5ebc3bea380/go.mod h1:55QSHmfG google.golang.org/genproto v0.0.0-20200513103714-09dca8ec2884/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= google.golang.org/genproto v0.0.0-20200515170657-fc4c6c6a6587/go.mod h1:YsZOwe1myG/8QRHRsmBRE1LrgQY60beZKjly0O1fX9U= google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo= +google.golang.org/genproto v0.0.0-20200605102947-12044bf5ea91/go.mod h1:jDfRM7FcilCzHH/e9qn6dsT145K34l5v+OpcnNgKAAA= google.golang.org/genproto v0.0.0-20200618031413-b414f8b61790/go.mod h1:jDfRM7FcilCzHH/e9qn6dsT145K34l5v+OpcnNgKAAA= google.golang.org/genproto v0.0.0-20200710124503-20a17af7bd0e/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= google.golang.org/genproto v0.0.0-20200724131911-43cab4749ae7/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= @@ -2417,6 +2458,8 @@ google.golang.org/genproto v0.0.0-20210608205507-b6d2f5bf0d7d/go.mod h1:UODoCrxH google.golang.org/genproto v0.0.0-20210624195500-8bfb893ecb84/go.mod h1:SzzZ/N+nwJDaO1kznhnlzqS8ocJICar6hYhVyhi++24= google.golang.org/genproto v0.0.0-20210713002101-d411969a0d9a/go.mod h1:AxrInvYm1dci+enl5hChSFPOmmUF1+uAa/UsgNRWd7k= google.golang.org/genproto v0.0.0-20210716133855-ce7ef5c701ea/go.mod h1:AxrInvYm1dci+enl5hChSFPOmmUF1+uAa/UsgNRWd7k= +google.golang.org/genproto v0.0.0-20210721163202-f1cecdd8b78a/go.mod h1:ob2IJxKrgPT52GcgX759i1sleT07tiKowYBGbczaW48= +google.golang.org/genproto v0.0.0-20210722135532-667f2b7c528f/go.mod h1:ob2IJxKrgPT52GcgX759i1sleT07tiKowYBGbczaW48= google.golang.org/genproto v0.0.0-20210728212813-7823e685a01f/go.mod h1:ob2IJxKrgPT52GcgX759i1sleT07tiKowYBGbczaW48= google.golang.org/genproto v0.0.0-20210805201207-89edb61ffb67/go.mod h1:ob2IJxKrgPT52GcgX759i1sleT07tiKowYBGbczaW48= google.golang.org/genproto v0.0.0-20210813162853-db860fec028c/go.mod h1:cFeNkxwySK631ADgubI+/XFU/xp8FD5KIVV4rj8UC5w= diff --git a/pkg/tracing/client/factory.go b/pkg/tracing/client/factory.go index 3de07746be3..455c3afc310 100644 --- a/pkg/tracing/client/factory.go +++ b/pkg/tracing/client/factory.go @@ -16,18 +16,20 @@ import ( "gopkg.in/yaml.v2" "github.com/thanos-io/thanos/pkg/tracing/elasticapm" + "github.com/thanos-io/thanos/pkg/tracing/google_cloud" "github.com/thanos-io/thanos/pkg/tracing/jaeger" "github.com/thanos-io/thanos/pkg/tracing/lightstep" - "github.com/thanos-io/thanos/pkg/tracing/stackdriver" + "github.com/thanos-io/thanos/pkg/tracing/migration" ) type TracingProvider string const ( - STACKDRIVER TracingProvider = "STACKDRIVER" - JAEGER TracingProvider = "JAEGER" - ELASTIC_APM TracingProvider = "ELASTIC_APM" - LIGHTSTEP TracingProvider = "LIGHTSTEP" + Stackdriver TracingProvider = "STACKDRIVER" + GoogleCloud TracingProvider = "GOOGLE_CLOUD" + Jaeger TracingProvider = "JAEGER" + ElasticAPM TracingProvider = "ELASTIC_APM" + Lightstep TracingProvider = "LIGHTSTEP" ) type TracingConfig struct { @@ -53,13 +55,18 @@ func NewTracer(ctx context.Context, logger log.Logger, metrics *prometheus.Regis } switch strings.ToUpper(string(tracingConf.Type)) { - case string(STACKDRIVER): - return stackdriver.NewTracer(ctx, logger, config) - case string(JAEGER): + case string(Stackdriver), string(GoogleCloud): + tracerProvider, err := google_cloud.NewTracerProvider(ctx, logger, config) + if err != nil { + return nil, nil, err + } + tracer, closerFunc := migration.Bridge(tracerProvider, logger) + return tracer, closerFunc, nil + case string(Jaeger): return jaeger.NewTracer(ctx, logger, metrics, config) - case string(ELASTIC_APM): + case string(ElasticAPM): return elasticapm.NewTracer(config) - case string(LIGHTSTEP): + case string(Lightstep): return lightstep.NewTracer(ctx, config) default: return nil, nil, errors.Errorf("tracing with type %s is not supported", tracingConf.Type) diff --git a/pkg/tracing/google_cloud/google_cloud.go b/pkg/tracing/google_cloud/google_cloud.go new file mode 100644 index 00000000000..29fb758a6c4 --- /dev/null +++ b/pkg/tracing/google_cloud/google_cloud.go @@ -0,0 +1,85 @@ +// Copyright (c) The Thanos Authors. +// Licensed under the Apache License 2.0. + +package google_cloud + +import ( + "context" + "os" + + cloudtrace "github.com/GoogleCloudPlatform/opentelemetry-operations-go/exporter/trace" + "github.com/go-kit/log" + "github.com/go-kit/log/level" + "github.com/prometheus/common/version" + "github.com/thanos-io/thanos/pkg/tracing/migration" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/sdk/resource" + tracesdk "go.opentelemetry.io/otel/sdk/trace" + semconv "go.opentelemetry.io/otel/semconv/v1.4.0" + "gopkg.in/yaml.v2" +) + +// Config - YAML configuration. +type Config struct { + ServiceName string `yaml:"service_name"` + ProjectId string `yaml:"project_id"` + SampleFactor uint64 `yaml:"sample_factor"` +} + +// NewTracerProvider create tracer provider from YAML. +func NewTracerProvider(ctx context.Context, logger log.Logger, conf []byte) (*tracesdk.TracerProvider, error) { + config := Config{} + if err := yaml.Unmarshal(conf, &config); err != nil { + return nil, err + } + + exporter, err := cloudtrace.New( + cloudtrace.WithContext(ctx), + cloudtrace.WithProjectID(config.ProjectId), + ) + if err != nil { + return nil, err + } + + return newTracerProvider(ctx, logger, tracesdk.NewBatchSpanProcessor(exporter), + config.SampleFactor, config.ServiceName), nil +} + +func newTracerProvider(ctx context.Context, logger log.Logger, processor tracesdk.SpanProcessor, sampleFactor uint64, serviceName string) *tracesdk.TracerProvider { + // Even if resource.New returns error, the resource will be valid - log the error and continue. + resource, err := resource.New(ctx, resource.WithAttributes(collectAttributes(serviceName)...)) + if err != nil { + level.Warn(logger).Log("msg", "detecting resources for tracing provider failed", "err", err) + } + + fraction := 1 / float64(sampleFactor) + if sampleFactor == 0 { + fraction = 0 + } + + tp := tracesdk.NewTracerProvider( + tracesdk.WithSpanProcessor(processor), + tracesdk.WithSampler( + migration.SamplerWithOverride( + tracesdk.ParentBased(tracesdk.TraceIDRatioBased(fraction)), + migration.ForceTracingAttributeKey, + ), + ), + tracesdk.WithResource(resource), + ) + + return tp +} + +func collectAttributes(serviceName string) []attribute.KeyValue { + attr := []attribute.KeyValue{ + semconv.ServiceNameKey.String(serviceName), + attribute.String("binary_revision", version.Revision), + } + + if len(os.Args) > 1 { + attr = append(attr, attribute.String("binary_cmd", os.Args[1])) + } + + return attr +} diff --git a/pkg/tracing/google_cloud/google_cloud_test.go b/pkg/tracing/google_cloud/google_cloud_test.go new file mode 100644 index 00000000000..69ab9ec59c4 --- /dev/null +++ b/pkg/tracing/google_cloud/google_cloud_test.go @@ -0,0 +1,164 @@ +// Copyright (c) The Thanos Authors. +// Licensed under the Apache License 2.0. + +// This file includes unit tests that test only tiny logic in this package, but are here mainly as a showcase on how tracing can +// be configured. + +package google_cloud + +import ( + "context" + "testing" + + "github.com/go-kit/log" + "github.com/opentracing/opentracing-go" + "github.com/thanos-io/thanos/pkg/testutil" + "github.com/thanos-io/thanos/pkg/tracing" + "github.com/thanos-io/thanos/pkg/tracing/migration" + tracesdk "go.opentelemetry.io/otel/sdk/trace" + "go.opentelemetry.io/otel/sdk/trace/tracetest" +) + +func TestMain(m *testing.M) { + testutil.TolerantVerifyLeakMain(m) +} + +// This test shows that if sample factor will enable tracing on client process, even when it would be disabled on server +// it will be still enabled for all spans within this span. +func TestContextTracing_ClientEnablesTracing(t *testing.T) { + exp := tracetest.NewInMemoryExporter() + tracerOtel := newTracerProvider( + context.Background(), + log.NewNopLogger(), + tracesdk.NewSimpleSpanProcessor(exp), + 1, // always sample + "gcloud-test-client", + ) + tracer, _ := migration.Bridge(tracerOtel, log.NewNopLogger()) + + clientRoot, clientCtx := tracing.StartSpan(tracing.ContextWithTracer(context.Background(), tracer), "a") + + // Simulate Server process with different tracer, but with client span in context. + srvTracerOtel := newTracerProvider( + context.Background(), + log.NewNopLogger(), + tracesdk.NewSimpleSpanProcessor(exp), + 0, // never sample + "gcloud-test-server", + ) + srvTracer, _ := migration.Bridge(srvTracerOtel, log.NewNopLogger()) + + srvRoot, srvCtx := tracing.StartSpan(tracing.ContextWithTracer(clientCtx, srvTracer), "b") + srvChild, _ := tracing.StartSpan(srvCtx, "bb") + testutil.Equals(t, 0, len(exp.GetSpans())) + + srvChild.Finish() + testutil.Equals(t, 1, len(exp.GetSpans())) + testutil.Equals(t, 1, countSampledSpans(exp.GetSpans())) + + srvRoot.Finish() + testutil.Equals(t, 2, len(exp.GetSpans())) + testutil.Equals(t, 2, countSampledSpans(exp.GetSpans())) + + clientRoot.Finish() + testutil.Equals(t, 3, len(exp.GetSpans())) + testutil.Equals(t, 3, countSampledSpans(exp.GetSpans())) +} + +// This test shows that if sample factor will disable tracing on client process, when it would be enabled on server +// it will be still disabled for all spans within this span. +func TestContextTracing_ClientDisablesTracing(t *testing.T) { + exp := tracetest.NewInMemoryExporter() + tracerOtel := newTracerProvider( + context.Background(), + log.NewNopLogger(), + tracesdk.NewSimpleSpanProcessor(exp), + 0, // never sample + "gcloud-test-client", + ) + tracer, _ := migration.Bridge(tracerOtel, log.NewNopLogger()) + + clientRoot, clientCtx := tracing.StartSpan(tracing.ContextWithTracer(context.Background(), tracer), "a") + + // Simulate Server process with different tracer, but with client span in context. + srvTracerOtel := newTracerProvider( + context.Background(), + log.NewNopLogger(), + tracesdk.NewSimpleSpanProcessor(exp), + 0, // never sample + "gcloud-test-server", + ) + srvTracer, _ := migration.Bridge(srvTracerOtel, log.NewNopLogger()) + + srvRoot, srvCtx := tracing.StartSpan(tracing.ContextWithTracer(clientCtx, srvTracer), "b") + srvChild, _ := tracing.StartSpan(srvCtx, "bb") + testutil.Equals(t, 0, len(exp.GetSpans())) + + // Since we are not recording neither sampling, no spans should show up. + srvChild.Finish() + testutil.Equals(t, 0, len(exp.GetSpans())) + + srvRoot.Finish() + testutil.Equals(t, 0, len(exp.GetSpans())) + + clientRoot.Finish() + testutil.Equals(t, 0, len(exp.GetSpans())) +} + +// This test shows that if span will contain special baggage (for example from special HTTP header), even when sample +// factor will disable client & server tracing, it will be still enabled for all spans within this span. +func TestContextTracing_ForceTracing(t *testing.T) { + exp := tracetest.NewInMemoryExporter() + tracerOtel := newTracerProvider( + context.Background(), + log.NewNopLogger(), + tracesdk.NewSimpleSpanProcessor(exp), + 0, // never sample + "gcloud-test-client", + ) + tracer, _ := migration.Bridge(tracerOtel, log.NewNopLogger()) + + // Start the root span with the tag to force tracing. + clientRoot, clientCtx := tracing.StartSpan( + tracing.ContextWithTracer(context.Background(), tracer), + "a", + opentracing.Tag{Key: migration.ForceTracingAttributeKey, Value: "true"}, + ) + + // Simulate Server process with different tracer, but with client span in context. + srvTracerOtel := newTracerProvider( + context.Background(), + log.NewNopLogger(), + tracesdk.NewSimpleSpanProcessor(exp), + 0, // never sample + "gcloud-test-server", + ) + srvTracer, _ := migration.Bridge(srvTracerOtel, log.NewNopLogger()) + + srvRoot, srvCtx := tracing.StartSpan(tracing.ContextWithTracer(clientCtx, srvTracer), "b") + srvChild, _ := tracing.StartSpan(srvCtx, "bb") + testutil.Equals(t, 0, len(exp.GetSpans())) + + srvChild.Finish() + testutil.Equals(t, 1, len(exp.GetSpans())) + testutil.Equals(t, 1, countSampledSpans(exp.GetSpans())) + + srvRoot.Finish() + testutil.Equals(t, 2, len(exp.GetSpans())) + testutil.Equals(t, 2, countSampledSpans(exp.GetSpans())) + + clientRoot.Finish() + testutil.Equals(t, 3, len(exp.GetSpans())) + testutil.Equals(t, 3, countSampledSpans(exp.GetSpans())) +} + +func countSampledSpans(ss tracetest.SpanStubs) int { + var count int + for _, s := range ss { + if s.SpanContext.IsSampled() { + count++ + } + } + + return count +} diff --git a/pkg/tracing/grpc.go b/pkg/tracing/grpc.go index 78b4391bf8a..2f638ed2355 100644 --- a/pkg/tracing/grpc.go +++ b/pkg/tracing/grpc.go @@ -6,6 +6,7 @@ package tracing import ( "context" + "github.com/davecgh/go-spew/spew" grpc_middleware "github.com/grpc-ecosystem/go-grpc-middleware/v2" grpc_opentracing "github.com/grpc-ecosystem/go-grpc-middleware/v2/interceptors/tracing" opentracing "github.com/opentracing/opentracing-go" @@ -37,6 +38,7 @@ func StreamServerInterceptor(tracer opentracing.Tracer) grpc.StreamServerInterce return func(srv interface{}, stream grpc.ServerStream, info *grpc.StreamServerInfo, handler grpc.StreamHandler) error { // Add our own tracer. wrappedStream := grpc_middleware.WrapServerStream(stream) + spew.Println("wrapped ctx", stream.Context()) wrappedStream.WrappedContext = ContextWithTracer(stream.Context(), tracer) return interceptor(srv, wrappedStream, info, handler) diff --git a/pkg/tracing/http.go b/pkg/tracing/http.go index bb906eb54a0..21b6663f4f0 100644 --- a/pkg/tracing/http.go +++ b/pkg/tracing/http.go @@ -14,6 +14,7 @@ import ( "github.com/go-kit/log/level" "github.com/opentracing/opentracing-go" "github.com/opentracing/opentracing-go/ext" + "github.com/thanos-io/thanos/pkg/tracing/migration" ) // HTTPMiddleware returns an HTTP handler that injects the given tracer and starts a new server span. @@ -22,7 +23,6 @@ func HTTPMiddleware(tracer opentracing.Tracer, name string, logger log.Logger, n operationName := fmt.Sprintf("/%s HTTP[server]", name) return func(w http.ResponseWriter, r *http.Request) { - var span opentracing.Span wireContext, err := tracer.Extract( opentracing.HTTPHeaders, opentracing.HTTPHeadersCarrier(r.Header), @@ -31,7 +31,17 @@ func HTTPMiddleware(tracer opentracing.Tracer, name string, logger log.Logger, n level.Error(logger).Log("msg", "failed to extract tracer from request", "operationName", operationName, "err", err) } - span = tracer.StartSpan(operationName, ext.RPCServerOption(wireContext)) + opts := []opentracing.StartSpanOption{ext.RPCServerOption(wireContext)} + // Check for force tracing header and add it as a tag at the start of span. + // This is required for the OpenTelemetry sampler to force tracing. + if r.Header.Get(ForceTracingBaggageKey) != "" { + opts = append(opts, opentracing.Tag{Key: migration.ForceTracingAttributeKey, Value: "true"}) + } + + span := tracer.StartSpan( + operationName, + opts..., + ) ext.HTTPMethod.Set(span, r.Method) ext.HTTPUrl.Set(span, r.URL.String()) @@ -42,6 +52,11 @@ func HTTPMiddleware(tracer opentracing.Tracer, name string, logger log.Logger, n if traceID, ok := t.GetTraceIDFromSpanContext(span.Context()); ok { w.Header().Set(traceIDResponseHeader, traceID) } + } else { + // Alternative to set trace ID header, if bridge tracer is being used. + if traceID, ok := migration.GetTraceIDFromBridgeSpan(span); ok { + w.Header().Set(traceIDResponseHeader, traceID) + } } next.ServeHTTP(w, r.WithContext(opentracing.ContextWithSpan(ContextWithTracer(r.Context(), tracer), span))) diff --git a/pkg/tracing/migration/bridge.go b/pkg/tracing/migration/bridge.go new file mode 100644 index 00000000000..4ace2e85c57 --- /dev/null +++ b/pkg/tracing/migration/bridge.go @@ -0,0 +1,129 @@ +// Copyright (c) The Thanos Authors. +// Licensed under the Apache License 2.0. + +package migration + +import ( + "context" + "io" + "time" + + "github.com/go-kit/log" + "github.com/go-kit/log/level" + "github.com/opentracing/opentracing-go" + ot_propagator "go.opentelemetry.io/contrib/propagators/ot" + "go.opentelemetry.io/otel" + bridge "go.opentelemetry.io/otel/bridge/opentracing" + "go.opentelemetry.io/otel/propagation" + tracesdk "go.opentelemetry.io/otel/sdk/trace" + "go.opentelemetry.io/otel/trace" +) + +// Bridge is a method to facilitate migration from OpenTracing (OT) to +// OpenTelemetry (OTEL). It pairs an OTEL tracer with a so-called bridge +// tracer, which satisfies the OT Tracer interface. This makes it possible +// for OT instrumentation to work with an OTEL tracer. +// +// NOTE: After instrumentation migration is finished, this bridge should be +// removed. +func Bridge(tp *tracesdk.TracerProvider, l log.Logger) (opentracing.Tracer, io.Closer) { + compositePropagator := propagation.NewCompositeTextMapPropagator(ot_propagator.OT{}, propagation.TraceContext{}, propagation.Baggage{}) + otel.SetErrorHandler(otelErrHandler(func(err error) { + level.Error(l).Log("msg", "OpenTelemetry ErrorHandler", "err", err) + })) + otel.SetTextMapPropagator(compositePropagator) + otel.SetTracerProvider(tp) + + bridgeTracer, _ := bridge.NewTracerPair(tp.Tracer("")) + bridgeTracer.SetWarningHandler(func(warn string) { + level.Warn(l).Log("msg", "OpenTelemetry BridgeWarningHandler", "warn", warn) + }) + bridgeTracer.SetTextMapPropagator(propagation.TraceContext{}) + + tpShutdownFunc := func() error { + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + + return tp.Shutdown(ctx) + } + + return &bridgeTracerWrapper{bt: bridgeTracer}, shutdownAsCloser(tpShutdownFunc) +} + +func GetTraceIDFromBridgeSpan(span opentracing.Span) (string, bool) { + ctx := bridge.NewBridgeTracer().ContextWithSpanHook(context.Background(), span) + otelSpan := trace.SpanFromContext(ctx) + if otelSpan.SpanContext().IsSampled() && otelSpan.SpanContext().IsValid() { + return otelSpan.SpanContext().TraceID().String(), true + } + + return "", false +} + +type otelErrHandler func(err error) + +func (o otelErrHandler) Handle(err error) { + o(err) +} + +// Workaround to satisfy io.Closer interface. +type shutdownAsCloser func() error + +func (s shutdownAsCloser) Close() error { + return s() +} + +// This wrapper is necessary to enable proper trace propagation for gRPC +// calls between components. The bridge.BridgeTracer currently supports injection / +// extraction of only single carrier type which is opentracing.HTTPHeadersCarrier. +// (see https://github.com/open-telemetry/opentelemetry-go/blob/main/bridge/opentracing/bridge.go#L626) +// +// To work around this, this wrapper extends Inject / Extract methods to "convert" +// other carrier types to opentracing.HTTPHeadersCarrier, in order to propagate +// data correctly. This is currently, at minimum, required for proper functioning +// of propagation in the gRPC middleware, which uses metadata.MD as a carrier. +// (see https://github.com/grpc-ecosystem/go-grpc-middleware/blob/v2.0.0-rc.2/interceptors/tracing/client.go#L95) +type bridgeTracerWrapper struct { + bt *bridge.BridgeTracer +} + +func (b *bridgeTracerWrapper) StartSpan(operationName string, opts ...opentracing.StartSpanOption) opentracing.Span { + return b.bt.StartSpan(operationName, opts...) +} + +func (b *bridgeTracerWrapper) Inject(sm opentracing.SpanContext, format interface{}, carrier interface{}) error { + otCarrier := opentracing.HTTPHeadersCarrier{} + err := b.bt.Inject(sm, format, otCarrier) + if err != nil { + return err + } + + if tmw, ok := carrier.(opentracing.TextMapWriter); ok { + err := otCarrier.ForeachKey(func(key, val string) error { + tmw.Set(key, val) + return nil + }) + if err != nil { + return err + } + } + + return b.bt.Inject(sm, format, carrier) +} + +func (b *bridgeTracerWrapper) Extract(format interface{}, carrier interface{}) (opentracing.SpanContext, error) { + if tmr, ok := carrier.(opentracing.TextMapReader); ok { + otCarrier := opentracing.HTTPHeadersCarrier{} + err := tmr.ForeachKey(func(key, val string) error { + otCarrier.Set(key, val) + return nil + }) + if err != nil { + return nil, err + } + + return b.bt.Extract(format, otCarrier) + } + + return b.bt.Extract(format, carrier) +} diff --git a/pkg/tracing/migration/sampler.go b/pkg/tracing/migration/sampler.go new file mode 100644 index 00000000000..10e75d59217 --- /dev/null +++ b/pkg/tracing/migration/sampler.go @@ -0,0 +1,47 @@ +// Copyright (c) The Thanos Authors. +// Licensed under the Apache License 2.0. + +package migration + +import ( + "fmt" + + "go.opentelemetry.io/otel/attribute" + tracesdk "go.opentelemetry.io/otel/sdk/trace" +) + +// ForceTracingAttributeKey is used to signalize a span should be traced. +const ForceTracingAttributeKey = "thanos.force_tracing" + +type samplerWithOverride struct { + baseSampler tracesdk.Sampler + overrideKey attribute.Key +} + +// SamplerWithOverride creates a new sampler with the capability to override +// the sampling decision, if the span includes an attribute with the specified key. +// Otherwise the sampler delegates the decision to the wrapped base sampler. This +// is primarily used to enable forced tracing in Thanos components. +// Implements go.opentelemetry.io/otel/sdk/trace.Sampler interface. +func SamplerWithOverride(baseSampler tracesdk.Sampler, overrideKey attribute.Key) tracesdk.Sampler { + return samplerWithOverride{ + baseSampler, + overrideKey, + } +} + +func (s samplerWithOverride) ShouldSample(p tracesdk.SamplingParameters) tracesdk.SamplingResult { + for _, attr := range p.Attributes { + if attr.Key == s.overrideKey { + return tracesdk.SamplingResult{ + Decision: tracesdk.RecordAndSample, + } + } + } + + return s.baseSampler.ShouldSample(p) +} + +func (s samplerWithOverride) Description() string { + return fmt.Sprintf("SamplerWithOverride{%s}", string(s.overrideKey)) +} diff --git a/pkg/tracing/stackdriver/tracer_test.go b/pkg/tracing/stackdriver/tracer_test.go deleted file mode 100644 index f869e090fdd..00000000000 --- a/pkg/tracing/stackdriver/tracer_test.go +++ /dev/null @@ -1,165 +0,0 @@ -// Copyright (c) The Thanos Authors. -// Licensed under the Apache License 2.0. - -// This file includes unit tests that test only tiny logic in this package, but are here mainly as a showcase on how tracing can -// be configured. - -package stackdriver - -import ( - "context" - "testing" - - "github.com/thanos-io/thanos/pkg/testutil" - "github.com/thanos-io/thanos/pkg/tracing" - - "github.com/opentracing/basictracer-go" -) - -func TestMain(m *testing.M) { - testutil.TolerantVerifyLeakMain(m) -} - -// This test shows that if sample factor will enable tracing on client process, even when it would be disabled on server -// it will be still enabled for all spans within this span. -func TestContextTracing_ClientEnablesTracing(t *testing.T) { - m := &basictracer.InMemorySpanRecorder{} - r := &forceRecorder{wrapped: m} - - clientTracer := &tracer{ - serviceName: "Test", - wrapped: basictracer.NewWithOptions(basictracer.Options{ - ShouldSample: func(traceID uint64) bool { - return true - }, - Recorder: r, - MaxLogsPerSpan: 100, - }), - } - - clientRoot, clientCtx := tracing.StartSpan(tracing.ContextWithTracer(context.Background(), clientTracer), "a") - - // Simulate Server process with different tracer, but with client span in context. - srvTracer := &tracer{ - serviceName: "Test", - wrapped: basictracer.NewWithOptions(basictracer.Options{ - ShouldSample: func(traceID uint64) bool { - return false - }, - Recorder: r, - MaxLogsPerSpan: 100, - }), - } - srvRoot, srvCtx := tracing.StartSpan(tracing.ContextWithTracer(clientCtx, srvTracer), "b") - srvChild, _ := tracing.StartSpan(srvCtx, "bb") - testutil.Equals(t, 0, len(m.GetSpans())) - - srvChild.Finish() - testutil.Equals(t, 1, len(m.GetSpans())) - testutil.Equals(t, 1, len(m.GetSampledSpans())) - - srvRoot.Finish() - testutil.Equals(t, 2, len(m.GetSpans())) - testutil.Equals(t, 2, len(m.GetSampledSpans())) - - clientRoot.Finish() - testutil.Equals(t, 3, len(m.GetSpans())) - testutil.Equals(t, 3, len(m.GetSampledSpans())) -} - -// This test shows that if sample factor will disable tracing on client process, when it would be enabled on server -// it will be still disabled for all spans within this span. -func TestContextTracing_ClientDisablesTracing(t *testing.T) { - m := &basictracer.InMemorySpanRecorder{} - r := &forceRecorder{wrapped: m} - - clientTracer := &tracer{ - serviceName: "Test", - wrapped: basictracer.NewWithOptions(basictracer.Options{ - ShouldSample: func(traceID uint64) bool { - return false - }, - Recorder: r, - MaxLogsPerSpan: 100, - }), - } - - clientRoot, clientCtx := tracing.StartSpan(tracing.ContextWithTracer(context.Background(), clientTracer), "a") - - // Simulate Server process with different tracer, but with client span in context. - srvTracer := &tracer{ - serviceName: "Test", - wrapped: basictracer.NewWithOptions(basictracer.Options{ - ShouldSample: func(traceID uint64) bool { - return true - }, - Recorder: r, - MaxLogsPerSpan: 100, - }), - } - srvRoot, srvCtx := tracing.StartSpan(tracing.ContextWithTracer(clientCtx, srvTracer), "b") - srvChild, _ := tracing.StartSpan(srvCtx, "bb") - testutil.Equals(t, 0, len(m.GetSpans())) - - srvChild.Finish() - testutil.Equals(t, 1, len(m.GetSpans())) - testutil.Equals(t, 0, len(m.GetSampledSpans())) - - srvRoot.Finish() - testutil.Equals(t, 2, len(m.GetSpans())) - testutil.Equals(t, 0, len(m.GetSampledSpans())) - - clientRoot.Finish() - testutil.Equals(t, 3, len(m.GetSpans())) - testutil.Equals(t, 0, len(m.GetSampledSpans())) -} - -// This test shows that if span will contain special baggage (for example from special HTTP header), even when sample -// factor will disable client & server tracing, it will be still enabled for all spans within this span. -func TestContextTracing_ForceTracing(t *testing.T) { - m := &basictracer.InMemorySpanRecorder{} - r := &forceRecorder{wrapped: m} - - clientTracer := &tracer{ - serviceName: "Test", - wrapped: basictracer.NewWithOptions(basictracer.Options{ - ShouldSample: func(traceID uint64) bool { - return false - }, - Recorder: r, - MaxLogsPerSpan: 100, - }), - } - - clientRoot, clientCtx := tracing.StartSpan(tracing.ContextWithTracer(context.Background(), clientTracer), "a") - - // Force tracing for this span and its children. - clientRoot.SetBaggageItem(tracing.ForceTracingBaggageKey, "Go for it") - - // Simulate Server process with different tracer, but with client span in context. - srvTracer := &tracer{ - serviceName: "Test", - wrapped: basictracer.NewWithOptions(basictracer.Options{ - ShouldSample: func(traceID uint64) bool { - return false - }, - Recorder: r, - MaxLogsPerSpan: 100, - }), - } - srvRoot, srvCtx := tracing.StartSpan(tracing.ContextWithTracer(clientCtx, srvTracer), "b") - srvChild, _ := tracing.StartSpan(srvCtx, "bb") - testutil.Equals(t, 0, len(m.GetSpans())) - - srvChild.Finish() - testutil.Equals(t, 1, len(m.GetSpans())) - testutil.Equals(t, 1, len(m.GetSampledSpans())) - - srvRoot.Finish() - testutil.Equals(t, 2, len(m.GetSpans())) - testutil.Equals(t, 2, len(m.GetSampledSpans())) - - clientRoot.Finish() - testutil.Equals(t, 3, len(m.GetSpans())) - testutil.Equals(t, 3, len(m.GetSampledSpans())) -} diff --git a/scripts/cfggen/main.go b/scripts/cfggen/main.go index 265d01c611c..9571f159f1a 100644 --- a/scripts/cfggen/main.go +++ b/scripts/cfggen/main.go @@ -36,9 +36,9 @@ import ( storecache "github.com/thanos-io/thanos/pkg/store/cache" trclient "github.com/thanos-io/thanos/pkg/tracing/client" "github.com/thanos-io/thanos/pkg/tracing/elasticapm" + "github.com/thanos-io/thanos/pkg/tracing/google_cloud" "github.com/thanos-io/thanos/pkg/tracing/jaeger" "github.com/thanos-io/thanos/pkg/tracing/lightstep" - "github.com/thanos-io/thanos/pkg/tracing/stackdriver" ) var ( @@ -57,10 +57,10 @@ var ( } tracingConfigs = map[trclient.TracingProvider]interface{}{ - trclient.JAEGER: jaeger.Config{}, - trclient.STACKDRIVER: stackdriver.Config{}, - trclient.ELASTIC_APM: elasticapm.Config{}, - trclient.LIGHTSTEP: lightstep.Config{}, + trclient.Jaeger: jaeger.Config{}, + trclient.GoogleCloud: google_cloud.Config{}, + trclient.ElasticAPM: elasticapm.Config{}, + trclient.Lightstep: lightstep.Config{}, } indexCacheConfigs = map[storecache.IndexCacheProvider]interface{}{ storecache.INMEMORY: storecache.InMemoryIndexCacheConfig{}, From cb96cf9e110539fa84fa562d7c90d5745da02b76 Mon Sep 17 00:00:00 2001 From: Matej Gera <38492574+matej-g@users.noreply.github.com> Date: Fri, 18 Mar 2022 01:57:42 +0100 Subject: [PATCH 20/24] Ruler: Fix WAL directory in stateless mode (#5242) --- CHANGELOG.md | 2 ++ cmd/thanos/rule.go | 5 ++--- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 44721110d47..5294355905a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,8 @@ We use *breaking :warning:* to mark changes that are not backward compatible (re ### Fixed +- [#5242](https://github.com/thanos-io/thanos/pull/5242) Ruler: Make ruler use the correct WAL directory. + ### Added - [#5220](https://github.com/thanos-io/thanos/pull/5220) Query Frontend: Add `--query-frontend.forward-header` flag, forward headers to downstream querier. diff --git a/cmd/thanos/rule.go b/cmd/thanos/rule.go index 47414f495f0..e73d13c583a 100644 --- a/cmd/thanos/rule.go +++ b/cmd/thanos/rule.go @@ -357,11 +357,10 @@ func runRule( return errors.Wrapf(err, "failed to parse remote write config %v", string(rwCfgYAML)) } - walDir := filepath.Join(conf.dataDir, "wal") // flushDeadline is set to 1m, but it is for metadata watcher only so not used here. remoteStore := remote.NewStorage(logger, reg, func() (int64, error) { return 0, nil - }, walDir, 1*time.Minute, nil) + }, conf.dataDir, 1*time.Minute, nil) if err := remoteStore.ApplyConfig(&config.Config{ GlobalConfig: config.GlobalConfig{ ExternalLabels: labelsTSDBToProm(conf.lset), @@ -371,7 +370,7 @@ func runRule( return errors.Wrap(err, "applying config to remote storage") } - agentDB, err = agent.Open(logger, reg, remoteStore, walDir, agentOpts) + agentDB, err = agent.Open(logger, reg, remoteStore, conf.dataDir, agentOpts) if err != nil { return errors.Wrap(err, "start remote write agent db") } From d0a51a86c5dde0304237092b63e45342e83d05f2 Mon Sep 17 00:00:00 2001 From: Ben Ye Date: Fri, 18 Mar 2022 00:53:07 -0700 Subject: [PATCH 21/24] switch to prometheus notifier (#5243) Signed-off-by: Ben Ye --- cmd/thanos/rule.go | 5 ++-- pkg/alert/alert.go | 58 +++++------------------------------------ pkg/alert/alert_test.go | 13 ++++----- 3 files changed, 16 insertions(+), 60 deletions(-) diff --git a/cmd/thanos/rule.go b/cmd/thanos/rule.go index e73d13c583a..5fc408cbe62 100644 --- a/cmd/thanos/rule.go +++ b/cmd/thanos/rule.go @@ -30,6 +30,7 @@ import ( "github.com/prometheus/prometheus/config" "github.com/prometheus/prometheus/model/labels" "github.com/prometheus/prometheus/model/relabel" + "github.com/prometheus/prometheus/notifier" "github.com/prometheus/prometheus/promql" "github.com/prometheus/prometheus/rules" "github.com/prometheus/prometheus/storage" @@ -465,13 +466,13 @@ func runRule( { // Run rule evaluation and alert notifications. notifyFunc := func(ctx context.Context, expr string, alerts ...*rules.Alert) { - res := make([]*alert.Alert, 0, len(alerts)) + res := make([]*notifier.Alert, 0, len(alerts)) for _, alrt := range alerts { // Only send actually firing alerts. if alrt.State == rules.StatePending { continue } - a := &alert.Alert{ + a := ¬ifier.Alert{ StartsAt: alrt.FiredAt, Labels: alrt.Labels, Annotations: alrt.Annotations, diff --git a/pkg/alert/alert.go b/pkg/alert/alert.go index 1d04df8c732..68f6b3fd6dc 100644 --- a/pkg/alert/alert.go +++ b/pkg/alert/alert.go @@ -25,6 +25,7 @@ import ( "github.com/prometheus/client_golang/prometheus/promauto" "github.com/prometheus/prometheus/model/labels" "github.com/prometheus/prometheus/model/relabel" + "github.com/prometheus/prometheus/notifier" "go.uber.org/atomic" "github.com/thanos-io/thanos/pkg/runutil" @@ -36,53 +37,6 @@ const ( contentTypeJSON = "application/json" ) -// Alert is a generic representation of an alert in the Prometheus eco-system. -type Alert struct { - // Label value pairs for purpose of aggregation, matching, and disposition - // dispatching. This must minimally include an "alertname" label. - Labels labels.Labels `json:"labels"` - - // Extra key/value information which does not define alert identity. - Annotations labels.Labels `json:"annotations"` - - // The known time range for this alert. Start and end time are both optional. - StartsAt time.Time `json:"startsAt,omitempty"` - EndsAt time.Time `json:"endsAt,omitempty"` - GeneratorURL string `json:"generatorURL,omitempty"` -} - -// Name returns the name of the alert. It is equivalent to the "alertname" label. -func (a *Alert) Name() string { - return a.Labels.Get(labels.AlertName) -} - -// Hash returns a hash over the alert. It is equivalent to the alert labels hash. -func (a *Alert) Hash() uint64 { - return a.Labels.Hash() -} - -func (a *Alert) String() string { - s := fmt.Sprintf("%s[%s]", a.Name(), fmt.Sprintf("%016x", a.Hash())[:7]) - if a.Resolved() { - return s + "[resolved]" - } - return s + "[active]" -} - -// Resolved returns true iff the activity interval ended in the past. -func (a *Alert) Resolved() bool { - return a.ResolvedAt(time.Now()) -} - -// ResolvedAt returns true off the activity interval ended before -// the given timestamp. -func (a *Alert) ResolvedAt(ts time.Time) bool { - if a.EndsAt.IsZero() { - return false - } - return !a.EndsAt.After(ts) -} - // Queue is a queue of alert notifications waiting to be sent. The queue is consumed in batches // and entries are dropped at the front if it runs full. type Queue struct { @@ -94,7 +48,7 @@ type Queue struct { alertRelabelConfigs []*relabel.Config mtx sync.Mutex - queue []*Alert + queue []*notifier.Alert morec chan struct{} pushed prometheus.Counter @@ -180,7 +134,7 @@ func (q *Queue) Cap() int { // Pop takes a batch of alerts from the front of the queue. The batch size is limited // according to the queues maxBatchSize limit. // It blocks until elements are available or a termination signal is send on termc. -func (q *Queue) Pop(termc <-chan struct{}) []*Alert { +func (q *Queue) Pop(termc <-chan struct{}) []*notifier.Alert { select { case <-termc: return nil @@ -190,7 +144,7 @@ func (q *Queue) Pop(termc <-chan struct{}) []*Alert { q.mtx.Lock() defer q.mtx.Unlock() - as := make([]*Alert, q.maxBatchSize) + as := make([]*notifier.Alert, q.maxBatchSize) n := copy(as, q.queue) q.queue = q.queue[n:] @@ -206,7 +160,7 @@ func (q *Queue) Pop(termc <-chan struct{}) []*Alert { } // Push adds a list of alerts to the queue. -func (q *Queue) Push(alerts []*Alert) { +func (q *Queue) Push(alerts []*notifier.Alert) { if len(alerts) == 0 { return } @@ -332,7 +286,7 @@ func toAPILabels(labels labels.Labels) models.LabelSet { // Send an alert batch to all given Alertmanager clients. // TODO(bwplotka): https://github.com/thanos-io/thanos/issues/660. -func (s *Sender) Send(ctx context.Context, alerts []*Alert) { +func (s *Sender) Send(ctx context.Context, alerts []*notifier.Alert) { if len(alerts) == 0 { return } diff --git a/pkg/alert/alert_test.go b/pkg/alert/alert_test.go index 81d47e99439..cca97107e82 100644 --- a/pkg/alert/alert_test.go +++ b/pkg/alert/alert_test.go @@ -17,6 +17,7 @@ import ( "github.com/prometheus/common/model" "github.com/prometheus/prometheus/model/labels" "github.com/prometheus/prometheus/model/relabel" + "github.com/prometheus/prometheus/notifier" "github.com/thanos-io/thanos/pkg/testutil" ) @@ -28,7 +29,7 @@ func TestQueue_Pop_all_Pushed(t *testing.T) { q := NewQueue(nil, nil, qcapacity, batchsize, nil, nil, nil) for i := 0; i < pushes; i++ { - q.Push([]*Alert{ + q.Push([]*notifier.Alert{ {}, {}, }) @@ -47,7 +48,7 @@ func TestQueue_Pop_all_Pushed(t *testing.T) { func TestQueue_Push_Relabelled(t *testing.T) { q := NewQueue(nil, nil, 10, 10, labels.FromStrings("a", "1", "replica", "A"), []string{"b", "replica"}, nil) - q.Push([]*Alert{ + q.Push([]*notifier.Alert{ {Labels: labels.FromStrings("b", "2", "c", "3")}, {Labels: labels.FromStrings("c", "3")}, {Labels: labels.FromStrings("a", "2")}, @@ -74,7 +75,7 @@ func TestQueue_Push_Relabelled_Alerts(t *testing.T) { }, ) - q.Push([]*Alert{ + q.Push([]*notifier.Alert{ {Labels: labels.FromMap(map[string]string{ "a": "abc", })}, @@ -134,7 +135,7 @@ func TestSenderSendsOk(t *testing.T) { } s := NewSender(nil, nil, []*Alertmanager{NewAlertmanager(nil, poster, time.Minute, APIv1)}) - s.Send(context.Background(), []*Alert{{}, {}}) + s.Send(context.Background(), []*notifier.Alert{{}, {}}) assertSameHosts(t, poster.urls, poster.seen) @@ -161,7 +162,7 @@ func TestSenderSendsOneFails(t *testing.T) { } s := NewSender(nil, nil, []*Alertmanager{NewAlertmanager(nil, poster, time.Minute, APIv1)}) - s.Send(context.Background(), []*Alert{{}, {}}) + s.Send(context.Background(), []*notifier.Alert{{}, {}}) assertSameHosts(t, poster.urls, poster.seen) @@ -182,7 +183,7 @@ func TestSenderSendsAllFail(t *testing.T) { } s := NewSender(nil, nil, []*Alertmanager{NewAlertmanager(nil, poster, time.Minute, APIv1)}) - s.Send(context.Background(), []*Alert{{}, {}}) + s.Send(context.Background(), []*notifier.Alert{{}, {}}) assertSameHosts(t, poster.urls, poster.seen) From ad98f55f84c879ec6431693a9a52801da7589344 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Giedrius=20Statkevi=C4=8Dius?= Date: Fri, 18 Mar 2022 11:03:31 +0200 Subject: [PATCH 22/24] query: promote negative offset and at-modifier to stable (#5244) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Let's follow https://github.com/prometheus/prometheus/pull/10121 and make these into stable features as well. Signed-off-by: Giedrius Statkevičius --- CHANGELOG.md | 1 + cmd/thanos/query.go | 22 +++++++++------------- docs/components/query.md | 1 - 3 files changed, 10 insertions(+), 14 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5294355905a..737aeee788b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -25,6 +25,7 @@ We use *breaking :warning:* to mark changes that are not backward compatible (re - [#5218](https://github.com/thanos-io/thanos/pull/5218) Tools: Run bucket downsample tools continuously. - [#5224](https://github.com/thanos-io/thanos/pull/5224) Receive: Remove sort on label hashing - [#5231](https://github.com/thanos-io/thanos/pull/5231) Tools: Bucket verify tool ignores blocks with deletion markers. +- [#5244](https://github.com/thanos-io/thanos/pull/5244) Query: Promote negative offset and `@` modifier to stable features as per Prometheus [#10121](https://github.com/prometheus/prometheus/pull/10121). ### Removed diff --git a/cmd/thanos/query.go b/cmd/thanos/query.go index 25f7c611333..f40ebed7751 100644 --- a/cmd/thanos/query.go +++ b/cmd/thanos/query.go @@ -161,7 +161,7 @@ func registerQuery(app *extkingpin.App) { enableMetricMetadataPartialResponse := cmd.Flag("metric-metadata.partial-response", "Enable partial response for metric metadata endpoint. --no-metric-metadata.partial-response for disabling."). Hidden().Default("true").Bool() - featureList := cmd.Flag("enable-feature", "Comma separated experimental feature names to enable.The current list of features is "+promqlNegativeOffset+", "+promqlAtModifier+" and "+queryPushdown+".").Default("").Strings() + featureList := cmd.Flag("enable-feature", "Comma separated experimental feature names to enable.The current list of features is "+queryPushdown+".").Default("").Strings() enableExemplarPartialResponse := cmd.Flag("exemplar.partial-response", "Enable partial response for exemplar endpoint. --no-exemplar.partial-response for disabling."). Hidden().Default("true").Bool() @@ -182,16 +182,16 @@ func registerQuery(app *extkingpin.App) { return errors.Wrap(err, "parse federation labels") } - var enableNegativeOffset, enableAtModifier, enableQueryPushdown bool + var enableQueryPushdown bool for _, feature := range *featureList { - if feature == promqlNegativeOffset { - enableNegativeOffset = true + if feature == queryPushdown { + enableQueryPushdown = true } if feature == promqlAtModifier { - enableAtModifier = true + level.Warn(logger).Log("msg", "This option for --enable-feature is now permanently enabled and therefore a no-op.", "option", promqlAtModifier) } - if feature == queryPushdown { - enableQueryPushdown = true + if feature == promqlNegativeOffset { + level.Warn(logger).Log("msg", "This option for --enable-feature is now permanently enabled and therefore a no-op.", "option", promqlNegativeOffset) } } @@ -280,8 +280,6 @@ func registerQuery(app *extkingpin.App) { *strictStores, *strictEndpoints, *webDisableCORS, - enableAtModifier, - enableNegativeOffset, enableQueryPushdown, *alertQueryURL, component.Query, @@ -349,8 +347,6 @@ func runQuery( strictStores []string, strictEndpoints []string, disableCORS bool, - enableAtModifier bool, - enableNegativeOffset bool, enableQueryPushdown bool, alertQueryURL string, comp component.Component, @@ -480,8 +476,8 @@ func runQuery( NoStepSubqueryIntervalFn: func(int64) int64 { return defaultEvaluationInterval.Milliseconds() }, - EnableNegativeOffset: enableNegativeOffset, - EnableAtModifier: enableAtModifier, + EnableNegativeOffset: true, + EnableAtModifier: true, } ) diff --git a/docs/components/query.md b/docs/components/query.md index 456a0359d04..ee9b29b21af 100644 --- a/docs/components/query.md +++ b/docs/components/query.md @@ -260,7 +260,6 @@ Flags: in all alerts 'Source' field. --enable-feature= ... Comma separated experimental feature names to enable.The current list of features is - promql-negative-offset, promql-at-modifier and query-pushdown. --endpoint= ... Addresses of statically configured Thanos API servers (repeatable). The scheme may be From 9336076185fd688a27222bcc9fca12a504064167 Mon Sep 17 00:00:00 2001 From: Matej Gera <38492574+matej-g@users.noreply.github.com> Date: Fri, 18 Mar 2022 10:31:25 +0100 Subject: [PATCH 23/24] Remove debug line (#5245) Signed-off-by: Matej Gera --- pkg/tracing/grpc.go | 2 -- 1 file changed, 2 deletions(-) diff --git a/pkg/tracing/grpc.go b/pkg/tracing/grpc.go index 2f638ed2355..78b4391bf8a 100644 --- a/pkg/tracing/grpc.go +++ b/pkg/tracing/grpc.go @@ -6,7 +6,6 @@ package tracing import ( "context" - "github.com/davecgh/go-spew/spew" grpc_middleware "github.com/grpc-ecosystem/go-grpc-middleware/v2" grpc_opentracing "github.com/grpc-ecosystem/go-grpc-middleware/v2/interceptors/tracing" opentracing "github.com/opentracing/opentracing-go" @@ -38,7 +37,6 @@ func StreamServerInterceptor(tracer opentracing.Tracer) grpc.StreamServerInterce return func(srv interface{}, stream grpc.ServerStream, info *grpc.StreamServerInfo, handler grpc.StreamHandler) error { // Add our own tracer. wrappedStream := grpc_middleware.WrapServerStream(stream) - spew.Println("wrapped ctx", stream.Context()) wrappedStream.WrappedContext = ContextWithTracer(stream.Context(), tracer) return interceptor(srv, wrappedStream, info, handler) From 149e0264e181ac1899a0409a7d50d78c245c18b7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Giedrius=20Statkevi=C4=8Dius?= Date: Fri, 18 Mar 2022 19:12:40 +0200 Subject: [PATCH 24/24] e2e: fix compact test's flakiness (#5246) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix the compact test's by running this sub-test sequentially. The further steps depend on this test's results so it's wrong to run it as a sub-test. Signed-off-by: Giedrius Statkevičius --- test/e2e/compact_test.go | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/test/e2e/compact_test.go b/test/e2e/compact_test.go index 96afd971cdc..cd05cd5e42e 100644 --- a/test/e2e/compact_test.go +++ b/test/e2e/compact_test.go @@ -754,7 +754,8 @@ func testCompactWithStoreGateway(t *testing.T, penaltyDedup bool) { testutil.Ok(t, str.WaitSumMetrics(e2e.Equals(0), "thanos_blocks_meta_modified")) } - t.Run("dedup enabled; no delete delay; compactor should work and remove things as expected", func(t *testing.T) { + // dedup enabled; no delete delay; compactor should work and remove things as expected. + { extArgs := []string{"--deduplication.replica-label=replica", "--deduplication.replica-label=rule_replica", "--delete-delay=0s"} if penaltyDedup { extArgs = append(extArgs, "--deduplication.func=penalty") @@ -805,7 +806,7 @@ func testCompactWithStoreGateway(t *testing.T, penaltyDedup bool) { testutil.Ok(t, str.WaitSumMetrics(e2e.Equals(float64(len(rawBlockIDs)+8-18+6-2+2)), "thanos_blocks_meta_synced")) testutil.Ok(t, str.WaitSumMetrics(e2e.Equals(0), "thanos_blocks_meta_sync_failures_total")) testutil.Ok(t, str.WaitSumMetrics(e2e.Equals(0), "thanos_blocks_meta_modified")) - }) + } // Ensure that querying downsampled blocks works. Then delete the raw block and try querying again. {