diff --git a/.circleci/config.yml b/.circleci/config.yml index 9fd45ea5d6..b333efc6ec 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -8,6 +8,7 @@ jobs: working_directory: /go/src/github.com/improbable-eng/thanos environment: GO111MODULE: 'on' + GOBIN: "/go/bin" steps: - checkout - setup_remote_docker: @@ -48,6 +49,8 @@ jobs: cross_build: machine: true working_directory: /home/circleci/.go_workspace/src/github.com/improbable-eng/thanos + environment: + GOBIN: "/home/circleci/.go_workspace/go/bin" steps: - checkout - run: make crossbuild @@ -61,6 +64,8 @@ jobs: # Available from https://hub.docker.com/r/circleci/golang/ - image: circleci/golang:1.12 working_directory: /go/src/github.com/improbable-eng/thanos + environment: + GOBIN: "/go/bin" steps: - checkout - setup_remote_docker: @@ -78,6 +83,8 @@ jobs: # Available from https://hub.docker.com/r/circleci/golang/ - image: circleci/golang:1.12 working_directory: /go/src/github.com/improbable-eng/thanos + environment: + GOBIN: "/go/bin" steps: - checkout - setup_remote_docker: @@ -127,4 +134,4 @@ workflows: tags: only: /^v[0-9]+(\.[0-9]+){2}(-.+|[^-.]*)$/ branches: - ignore: /.*/ + ignore: /.*/ \ No newline at end of file diff --git a/.gitignore b/.gitignore index 83ded54b32..12f3a9109d 100644 --- a/.gitignore +++ b/.gitignore @@ -17,3 +17,7 @@ data/ /.idea /*.iml + +website/public/ +website/docs-pre-processed/ +!website/data diff --git a/CHANGELOG.md b/CHANGELOG.md index 03627e3283..6659e22f43 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,39 +11,129 @@ We use *breaking* word for marking changes that are not backward compatible (rel ## Unreleased -### Added -- [#811](https://github.com/improbable-eng/thanos/pull/811) Remote write receiver -- [#798](https://github.com/improbable-eng/thanos/pull/798) Ability to limit the maximum concurrent about of Series() calls in Thanos Store and the maximum amount of samples. +## [v0.4.0](https://github.com/improbable-eng/thanos/releases/tag/v0.4.0) - 2019.05.3 -New options: +:warning: **IMPORTANT** :warning: This is the last release that supports gossip. From Thanos v0.5.0, gossip will be completely removed. -* `--store.grpc.series-sample-limit` limits the amount of samples that might be retrieved on a single Series() call. By default it is 0. Consider enabling it by setting it to more than 0 if you are running on limited resources. -* `--store.grpc.series-max-concurrency` limits the number of concurrent Series() calls in Thanos Store. By default it is 20. Considering making it lower or bigger depending on the scale of your deployment. +This release also disables gossip mode by default for all components. +See [this](docs/proposals/approved/201809_gossip-removal.md) for more details. -New metrics: -* `thanos_bucket_store_queries_dropped_total` shows how many queries were dropped due to the samples limit; -* `thanos_bucket_store_queries_concurrent_max` is a constant metric which shows how many Series() calls can concurrently be executed by Thanos Store; -* `thanos_bucket_store_queries_in_flight` shows how many queries are currently "in flight" i.e. they are being executed; -* `thanos_bucket_store_gate_duration_seconds` shows how many seconds it took for queries to pass through the gate in both cases - when that fails and when it does not. +### Added -New tracing span: -* `store_query_gate_ismyturn` shows how long it took for a query to pass (or not) through the gate. +- [thanos.io](https://thanos.io) website & automation :tada: +- [#1053](https://github.com/improbable-eng/thanos/pull/1053) compactor: Compactor & store gateway now handles incomplete uploads gracefully. Added hard limit on how long block upload can take (30m). +- [#811](https://github.com/improbable-eng/thanos/pull/811) Remote write receiver component :heart: :heart: thanks to RedHat (@brancz) contribution. +- [#910](https://github.com/improbable-eng/thanos/pull/910) Query's stores UI page is now sorted by type and old DNS or File SD stores are removed after 5 minutes (configurable via the new `--store.unhealthy-timeout=5m` flag). +- [#905](https://github.com/improbable-eng/thanos/pull/905) Thanos support for Query API: /api/v1/labels. Notice that the API was added in Prometheus v2.6. +- [#798](https://github.com/improbable-eng/thanos/pull/798) Ability to limit the maximum number of concurrent request to Series() calls in Thanos Store and the maximum amount of samples we handle. +- [#1060](https://github.com/improbable-eng/thanos/pull/1060) Allow specifying region attribute in S3 storage configuration :warning: **WARNING** :warning: #798 adds a new default limit to Thanos Store: `--store.grpc.series-max-concurrency`. Most likely you will want to make it the same as `--query.max-concurrent` on Thanos Query. -- [#970](https://github.com/improbable-eng/thanos/pull/970) Added `PartialResponseStrategy` field for `RuleGroups` for `Ruler`. +New options: + + New Store flags: + + * `--store.grpc.series-sample-limit` limits the amount of samples that might be retrieved on a single Series() call. By default it is 0. Consider enabling it by setting it to more than 0 if you are running on limited resources. + * `--store.grpc.series-max-concurrency` limits the number of concurrent Series() calls in Thanos Store. By default it is 20. Considering making it lower or bigger depending on the scale of your deployment. + + New Store metrics: + + * `thanos_bucket_store_queries_dropped_total` shows how many queries were dropped due to the samples limit; + * `thanos_bucket_store_queries_concurrent_max` is a constant metric which shows how many Series() calls can concurrently be executed by Thanos Store; + * `thanos_bucket_store_queries_in_flight` shows how many queries are currently "in flight" i.e. they are being executed; + * `thanos_bucket_store_gate_duration_seconds` shows how many seconds it took for queries to pass through the gate in both cases - when that fails and when it does not. + + New Store tracing span: + * `store_query_gate_ismyturn` shows how long it took for a query to pass (or not) through the gate. + +- [#1016](https://github.com/improbable-eng/thanos/pull/1016) Added option for another DNS resolver (miekg/dns client). +Note that this is required to have SRV resolution working on [Golang 1.11+ with KubeDNS below v1.14](https://github.com/golang/go/issues/27546) + + New Querier and Ruler flag: `-- store.sd-dns-resolver` which allows to specify resolver to use. Either `golang` or `miekgdns` + +- [#986](https://github.com/improbable-eng/thanos/pull/986) Allow to save some startup & sync time in store gateway as it is no longer needed to compute index-cache from block index on its own for larger blocks. + The store Gateway still can do it, but it first checks bucket if there is index-cached uploaded already. + In the same time, compactor precomputes the index cache file on every compaction. + + New Compactor flag: `--index.generate-missing-cache-file` was added to allow quicker addition of index cache files. If enabled it precomputes missing files on compactor startup. Note that it will take time and it's only one-off step per bucket. + +- [#887](https://github.com/improbable-eng/thanos/pull/887) Compact: Added new `--block-sync-concurrency` flag, which allows you to configure number of goroutines to use when syncing block metadata from object storage. +- [#928](https://github.com/improbable-eng/thanos/pull/928) Query: Added `--store.response-timeout` flag. If a Store doesn't send any data in this specified duration then a Store will be ignored and partial data will be returned if it's enabled. 0 disables timeout. +- [#893](https://github.com/improbable-eng/thanos/pull/893) S3 storage backend has graduated to `stable` maturity level. +- [#936](https://github.com/improbable-eng/thanos/pull/936) Azure storage backend has graduated to `stable` maturity level. +- [#937](https://github.com/improbable-eng/thanos/pull/937) S3: added trace functionality. You can add `trace.enable: true` to enable the minio client's verbose logging. +- [#953](https://github.com/improbable-eng/thanos/pull/953) Compact: now has a hidden flag `--debug.accept-malformed-index`. Compaction index verification will ignore out of order label names. +- [#963](https://github.com/improbable-eng/thanos/pull/963) GCS: added possibility to inline ServiceAccount into GCS config. +- [#1010](https://github.com/improbable-eng/thanos/pull/1010) Compact: added new flag `--compact.concurrency`. Number of goroutines to use when compacting groups. +- [#1028](https://github.com/improbable-eng/thanos/pull/1028) Query: added `--query.default-evaluation-interval`, which sets default evaluation interval for sub queries. +- [#980](https://github.com/improbable-eng/thanos/pull/980) Ability to override Azure storage endpoint for other regions (China) +- [#1021](https://github.com/improbable-eng/thanos/pull/1021) Query API `series` now supports POST method. +- [#939](https://github.com/improbable-eng/thanos/pull/939) Query API `query_range` now supports POST method. ### Changed -- [#970](https://github.com/improbable-eng/thanos/pull/970) Deprecated partial_response_disabled proto field. Added partial_response_strategy instead. Both in gRPC and Query API. -- [#970](https://github.com/improbable-eng/thanos/pull/970) No `PartialResponseStrategy` field for `RuleGroups` by default means `abort` strategy (old PartialResponse disabled) as this is recommended option for Rules and alerts. + +- [#970](https://github.com/improbable-eng/thanos/pull/970) Deprecated `partial_response_disabled` proto field. Added `partial_response_strategy` instead. Both in gRPC and Query API. + No `PartialResponseStrategy` field for `RuleGroups` by default means `abort` strategy (old PartialResponse disabled) as this is recommended option for Rules and alerts. + + Metrics: + + * Added `thanos_rule_evaluation_with_warnings_total` to Ruler. + * DNS `thanos_ruler_query_apis*` are now `thanos_ruler_query_apis_*` for consistency. + * DNS `thanos_querier_store_apis*` are now `thanos_querier_store_apis__*` for consistency. + * Query Gate `thanos_bucket_store_series*` are now `thanos_bucket_store_series_*` for consistency. + * Most of thanos ruler metris related to rule manager has `strategy` label. + + Ruler tracing spans: + + * `/rule_instant_query HTTP[client]` is now `/rule_instant_query_part_resp_abort HTTP[client]"` if request is for abort strategy. + +- [#1009](https://github.com/improbable-eng/thanos/pull/1009): Upgraded Prometheus (~v2.7.0-rc.0 to v2.8.1) and TSDB (`v0.4.0` to `v0.6.1`) deps. + + Changes that affects Thanos: + * query: + * [ENHANCEMENT] In histogram_quantile merge buckets with equivalent le values. #5158. + * [ENHANCEMENT] Show list of offending labels in the error message in many-to-many scenarios. #5189 + * [BUGFIX] Fix panic when aggregator param is not a literal. #5290 + * ruler: + * [ENHANCEMENT] Reduce time that Alertmanagers are in flux when reloaded. #5126 + * [BUGFIX] prometheus_rule_group_last_evaluation_timestamp_seconds is now a unix timestamp. #5186 + * [BUGFIX] prometheus_rule_group_last_duration_seconds now reports seconds instead of nanoseconds. Fixes our [issue #1027](https://github.com/improbable-eng/thanos/issues/1027) + * [BUGFIX] Fix sorting of rule groups. #5260 + * store: [ENHANCEMENT] Fast path for EmptyPostings cases in Merge, Intersect and Without. + * tooling: [FEATURE] New dump command to tsdb tool to dump all samples. + * compactor: [ENHANCEMENT] When closing the db any running compaction will be cancelled so it doesn't block. + + For ruler essentially whole TSDB CHANGELOG applies beween v0.4.0-v0.6.1: https://github.com/prometheus/tsdb/blob/master/CHANGELOG.md + + Note that this was added on TSDB and Prometheus: [FEATURE] Time-ovelapping blocks are now allowed. #370 + Whoever due to nature of Thanos compaction (distributed systems), for safety reason this is disabled for Thanos compactor for now. + +- [#868](https://github.com/improbable-eng/thanos/pull/868) Go has been updated to 1.12. +- [#1055](https://github.com/improbable-eng/thanos/pull/1055) Gossip flags are now disabled by default and deprecated. +- [#964](https://github.com/improbable-eng/thanos/pull/964) repair: Repair process now sorts the series and labels within block. +- [#1073](https://github.com/improbable-eng/thanos/pull/1073) Store: index cache for requests. It now calculates the size properly (includes slice header), has anti-deadlock safeguard and reports more metrics. ### Fixed -- [#921](https://github.com/improbable-eng/thanos/pull/921) `thanos_objstore_bucket_last_successful_upload_time` now does not appear when no blocks have been uploaded so far + +- [#921](https://github.com/improbable-eng/thanos/pull/921) `thanos_objstore_bucket_last_successful_upload_time` now does not appear when no blocks have been uploaded so far. - [#966](https://github.com/improbable-eng/thanos/pull/966) Bucket: verify no longer warns about overlapping blocks, that overlap `0s` +- [#848](https://github.com/improbable-eng/thanos/pull/848) Compact: now correctly works with time series with duplicate labels. +- [#894](https://github.com/improbable-eng/thanos/pull/894) Thanos Rule: UI now correctly shows evaluation time. +- [#865](https://github.com/improbable-eng/thanos/pull/865) Query: now properly parses DNS SRV Service Discovery. +- [#889](https://github.com/improbable-eng/thanos/pull/889) Store: added safeguard against merging posting groups segfault +- [#941](https://github.com/improbable-eng/thanos/pull/941) Sidecar: added better handling of intermediate restarts. +- [#933](https://github.com/improbable-eng/thanos/pull/933) Query: Fixed 30 seconds lag of adding new store to query. +- [#962](https://github.com/improbable-eng/thanos/pull/962) Sidecar: Make config reloader file writes atomic. +- [#982](https://github.com/improbable-eng/thanos/pull/982) Query: now advertises Min & Max Time accordingly to the nodes. +- [#1041](https://github.com/improbable-eng/thanos/issues/1038) Ruler is now able to return long time range queries. +- [#904](https://github.com/improbable-eng/thanos/pull/904) Compact: Skip compaction for blocks with no samples. +- [#1070](https://github.com/improbable-eng/thanos/pull/1070) Downsampling works back again. Deferred closer errors are now properly captured. ## [v0.3.2](https://github.com/improbable-eng/thanos/releases/tag/v0.3.2) - 2019.03.04 ### Added + - [#851](https://github.com/improbable-eng/thanos/pull/851) New read API endpoint for api/v1/rules and api/v1/alerts. - [#873](https://github.com/improbable-eng/thanos/pull/873) Store: fix set index cache LRU @@ -52,12 +142,14 @@ broken so it was unbounded all the time. From this release actual value matters the old behaviour (no boundary), use a large enough value. ### Fixed + - [#833](https://github.com/improbable-eng/thanos/issues/833) Store Gateway matcher regression for intersecting with empty posting. - [#867](https://github.com/improbable-eng/thanos/pull/867) Fixed race condition in sidecare between reloader and shipper. ## [v0.3.1](https://github.com/improbable-eng/thanos/releases/tag/v0.3.1) - 2019.02.18 ### Fixed + - [#829](https://github.com/improbable-eng/thanos/issues/829) Store Gateway crashing due to `slice bounds out of range`. - [#834](https://github.com/improbable-eng/thanos/issues/834) Store Gateway matcher regression for `<>` `!=`. @@ -170,7 +262,7 @@ Note lots of necessary breaking changes in flags that relates to bucket configur - In `thanos rule`, file based discovery of query nodes using `--query.file-sd-config.files` - In `thanos query`, file based discovery of store nodes using `--store.file-sd-config.files` - `/-/healthy` endpoint to Querier. -- DNS service discovery to static and file based configurations using the `dns+` and `dnssrv+` prefixes for the respective lookup. Details [here](/docs/service_discovery.md) +- DNS service discovery to static and file based configurations using the `dns+` and `dnssrv+` prefixes for the respective lookup. Details [here](docs/service-discovery.md) - `--cluster.disable` flag to disable gossip functionality completely. - Hidden flag to configure max compaction level. - Azure Storage. diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md index f25c237bce..03ff306436 100644 --- a/CODE_OF_CONDUCT.md +++ b/CODE_OF_CONDUCT.md @@ -58,7 +58,4 @@ face temporary or permanent repercussions as determined by other members of the ## Attribution -This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, available at [http://contributor-covenant.org/version/1/4][version] - -[homepage]: http://contributor-covenant.org -[version]: http://contributor-covenant.org/version/1/4/ \ No newline at end of file +This Code of Conduct is adapted from the [Contributor Covenant](http://contributor-covenant.org), version 1.4, available at [http://contributor-covenant.org/version/1/4](http://contributor-covenant.org/version/1/4/) \ No newline at end of file diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 6efb35b70f..4769428dd5 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -27,7 +27,7 @@ Adding large new features and components to Thanos should be done by first creat ## Pull Request Process -1. Read [getting started docs](docs/getting_started.md) and prepare Thanos. +1. Read [getting started docs](docs/getting-started.md) and prepare Thanos. 2. Familiarize yourself with [Makefile](Makefile) commands like `format`, `build`, `proto` and `test`. 3. Fork improbable-eng/thanos.git and start development from your own fork. Here are sample steps to setup your development environment: ```console diff --git a/Makefile b/Makefile index 88b9ebb2ff..7eb4bce5d5 100644 --- a/Makefile +++ b/Makefile @@ -2,53 +2,59 @@ PREFIX ?= $(shell pwd) DIRECTORIES ?= $(shell find . -path './*' -prune -type d -not -path "./vendor") DOCKER_IMAGE_NAME ?= thanos DOCKER_IMAGE_TAG ?= $(subst /,-,$(shell git rev-parse --abbrev-ref HEAD))-$(shell date +%Y-%m-%d)-$(shell git rev-parse --short HEAD) -# $GOPATH/bin might not be in $PATH, so we can't assume `which` would give use -# the path of promu et al. As for selecting the first GOPATH, we assume: -# - most people only have one GOPATH at a time; -# - if you don't have one or any of those tools installed, running `go get` -# would place them in the first GOPATH. -# It's possible that any of the tools would be installed in the other GOPATHs, -# but for simplicity sake we just make sure they exist in the first one, and -# then keep using those. -FIRST_GOPATH ?= $(firstword $(subst :, ,$(shell go env GOPATH))) + TMP_GOPATH ?= /tmp/thanos-go -BIN_DIR ?= $(FIRST_GOPATH)/bin +GOBIN ?= ${GOPATH}/bin GO111MODULE ?= on export GO111MODULE # Tools. -EMBEDMD ?= $(BIN_DIR)/embedmd-$(EMBEDMD_VERSION) +EMBEDMD ?= $(GOBIN)/embedmd-$(EMBEDMD_VERSION) # v2.0.0 EMBEDMD_VERSION ?= 97c13d6e41602fc6e397eb51c45f38069371a969 -ERRCHECK ?= $(BIN_DIR)/errcheck-$(ERRCHECK_VERSION) +ERRCHECK ?= $(GOBIN)/errcheck-$(ERRCHECK_VERSION) # v1.2.0 ERRCHECK_VERSION ?= e14f8d59a22d460d56c5ee92507cd94c78fbf274 -LICHE ?= $(BIN_DIR)/liche-$(LICHE_VERSION) +LICHE ?= $(GOBIN)/liche-$(LICHE_VERSION) LICHE_VERSION ?= 2a2e6e56f6c615c17b2e116669c4cdb31b5453f3 -GOIMPORTS ?= $(BIN_DIR)/goimports-$(GOIMPORTS_VERSION) +GOIMPORTS ?= $(GOBIN)/goimports-$(GOIMPORTS_VERSION) GOIMPORTS_VERSION ?= 1c3d964395ce8f04f3b03b30aaed0b096c08c3c6 -PROMU ?= $(BIN_DIR)/promu-$(PROMU_VERSION) +PROMU ?= $(GOBIN)/promu-$(PROMU_VERSION) # v0.2.0 PROMU_VERSION ?= 264dc36af9ea3103255063497636bd5713e3e9c1 -PROTOC ?= $(BIN_DIR)/protoc-$(PROTOC_VERSION) +PROTOC ?= $(GOBIN)/protoc-$(PROTOC_VERSION) PROTOC_VERSION ?= 3.4.0 +# v0.55.3 This needs to match with version in netlify.toml +HUGO_VERSION ?= 993b84333cd75faa224d02618f312a0e96b53372 +HUGO ?= $(GOBIN)/hugo-$(HUGO_VERSION) +# v3.1.1 +GOBINDATA_VERSION ?= a9c83481b38ebb1c4eb8f0168fd4b10ca1d3c523 +GOBINDATA ?= $(GOBIN)/go-bindata-$(GOBINDATA_VERSION) GIT ?= $(shell which git) BZR ?= $(shell which bzr) +WEB_DIR ?= website +WEBSITE_BASE_URL ?= https://thanos.io +PUBLIC_DIR ?= $(WEB_DIR)/public +ME ?= $(shell whoami) + # E2e test deps. # Referenced by github.com/improbable-eng/thanos/blob/master/docs/getting_started.md#prometheus -# Limitied prom version, because testing was not possibe. This should fix it: https://github.com/improbable-eng/thanos/issues/758 -PROM_VERSIONS ?=v2.4.3 v2.5.0 +# Limited prom version, because testing was not possible. This should fix it: https://github.com/improbable-eng/thanos/issues/758 +PROM_VERSIONS ?=v2.4.3 v2.5.0 v2.8.1 + ALERTMANAGER_VERSION ?=v0.15.2 MINIO_SERVER_VERSION ?=RELEASE.2018-10-06T00-15-16Z -# fetch_go_bin_version downloads (go gets) the binary from specific version and installs it in $(BIN_DIR)/- +# fetch_go_bin_version downloads (go gets) the binary from specific version and installs it in $(GOBIN)/- # arguments: # $(1): Install path. (e.g github.com/campoy/embedmd) # $(2): Tag or revision for checkout. +# TODO(bwplotka): Move to just using modules, however make sure to not use or edit Thanos go.mod file! define fetch_go_bin_version - @mkdir -p $(BIN_DIR) + @mkdir -p $(GOBIN) + @mkdir -p $(TMP_GOPATH) @echo ">> fetching $(1)@$(2) revision/version" @if [ ! -d '$(TMP_GOPATH)/src/$(1)' ]; then \ @@ -59,8 +65,27 @@ define fetch_go_bin_version @CDPATH='' cd -- '$(TMP_GOPATH)/src/$(1)' && git checkout -f -q '$(2)' @echo ">> installing $(1)@$(2)" @GOBIN='$(TMP_GOPATH)/bin' GOPATH='$(TMP_GOPATH)' GO111MODULE='off' go install '$(1)' - @mv -- '$(TMP_GOPATH)/bin/$(shell basename $(1))' '$(BIN_DIR)/$(shell basename $(1))-$(2)' - @echo ">> produced $(BIN_DIR)/$(shell basename $(1))-$(2)" + @mv -- '$(TMP_GOPATH)/bin/$(shell basename $(1))' '$(GOBIN)/$(shell basename $(1))-$(2)' + @echo ">> produced $(GOBIN)/$(shell basename $(1))-$(2)" + +endef + +define require_clean_work_tree + @git update-index -q --ignore-submodules --refresh + + @if ! git diff-files --quiet --ignore-submodules --; then \ + echo >&2 "cannot $1: you have unstaged changes."; \ + git diff-files --name-status -r --ignore-submodules -- >&2; \ + echo >&2 "Please commit or stash them."; \ + exit 1; \ + fi + + @if ! git diff-index --cached --quiet HEAD --ignore-submodules --; then \ + echo >&2 "cannot $1: your index contains uncommitted changes."; \ + git diff-index --cached --name-status -r --ignore-submodules HEAD -- >&2; \ + echo >&2 "Please commit or stash them."; \ + exit 1; \ + fi endef @@ -69,24 +94,23 @@ all: format build # assets repacks all statis assets into go file for easier deploy. .PHONY: assets -assets: +assets: $(GOBINDATA) @echo ">> deleting asset file" @rm pkg/ui/bindata.go || true @echo ">> writing assets" - @go get -u github.com/jteeuwen/go-bindata/... - @go-bindata $(bindata_flags) -pkg ui -o pkg/ui/bindata.go -ignore '(.*\.map|bootstrap\.js|bootstrap-theme\.css|bootstrap\.css)' pkg/ui/templates/... pkg/ui/static/... + @$(GOBINDATA) $(bindata_flags) -pkg ui -o pkg/ui/bindata.go -ignore '(.*\.map|bootstrap\.js|bootstrap-theme\.css|bootstrap\.css)' pkg/ui/templates/... pkg/ui/static/... @go fmt ./pkg/ui # build builds Thanos binary using `promu`. .PHONY: build build: check-git check-bzr go-mod-tidy $(PROMU) - @echo ">> building binaries" + @echo ">> building binaries $(GOBIN)" @$(PROMU) build --prefix $(PREFIX) # crossbuild builds all binaries for all platforms. .PHONY: crossbuild -crossbuild: check-git check-bzr go-mod-tidy $(PROMU) +crossbuild: $(PROMU) @echo ">> crossbuilding all binaries" $(PROMU) crossbuild -v @@ -119,6 +143,7 @@ docs: $(EMBEDMD) build check-docs: $(EMBEDMD) $(LICHE) build @EMBEDMD_BIN="$(EMBEDMD)" scripts/genflagdocs.sh check @$(LICHE) --recursive docs --exclude "cloud.tencent.com" --document-root . + @$(LICHE) --exclude "cloud.tencent.com" --document-root . *.md # errcheck performs static analysis and returns error if any of the errors is not checked. .PHONY: errcheck @@ -147,7 +172,7 @@ promu: $(PROMU) .PHONY: tarball tarball: $(PROMU) @echo ">> building release tarball" - $(PROMU) tarball --prefix $(PREFIX) $(BIN_DIR) + $(PROMU) tarball --prefix $(PREFIX) $(GOBIN) .PHONY: tarballs-release tarballs-release: $(PROMU) @@ -203,6 +228,22 @@ else @echo >&2 "No bzr binary found."; exit 1 endif +.PHONY: web-pre-process +web-pre-process: + @echo ">> running documentation website pre processing" + @bash scripts/websitepreprocess.sh + +.PHONY: web +web: web-pre-process $(HUGO) + @echo ">> building documentation website" + # TODO(bwplotka): Make it --gc + @cd $(WEB_DIR) && HUGO_ENV=production $(HUGO) --config hugo.yaml --minify -v -b $(WEBSITE_BASE_URL) + +.PHONY: web-serve +web-serve: web-pre-process $(HUGO) + @echo ">> serving documentation website" + @cd $(WEB_DIR) && $(HUGO) --config hugo.yaml -v server + # non-phony targets $(EMBEDMD): $(call fetch_go_bin_version,github.com/campoy/embedmd,$(EMBEDMD_VERSION)) @@ -219,10 +260,18 @@ $(LICHE): $(PROMU): $(call fetch_go_bin_version,github.com/prometheus/promu,$(PROMU_VERSION)) +$(HUGO): + @go get github.com/gohugoio/hugo@$(HUGO_VERSION) + @mv $(GOBIN)/hugo $(HUGO) + @go mod tidy + +$(GOBINDATA): + $(call fetch_go_bin_version,github.com/go-bindata/go-bindata/go-bindata,$(GOBINDATA_VERSION)) + $(PROTOC): @mkdir -p $(TMP_GOPATH) @echo ">> fetching protoc@${PROTOC_VERSION}" @PROTOC_VERSION="$(PROTOC_VERSION)" TMP_GOPATH="$(TMP_GOPATH)" scripts/installprotoc.sh @echo ">> installing protoc@${PROTOC_VERSION}" - @mv -- "$(TMP_GOPATH)/bin/protoc" "$(BIN_DIR)/protoc-$(PROTOC_VERSION)" - @echo ">> produced $(BIN_DIR)/protoc-$(PROTOC_VERSION)" + @mv -- "$(TMP_GOPATH)/bin/protoc" "$(GOBIN)/protoc-$(PROTOC_VERSION)" + @echo ">> produced $(GOBIN)/protoc-$(PROTOC_VERSION)" diff --git a/README.md b/README.md index a734ba329c..b9eed459b3 100644 --- a/README.md +++ b/README.md @@ -5,6 +5,7 @@ [![GoDoc](https://godoc.org/github.com/improbable-eng/thanos?status.svg)](https://godoc.org/github.com/improbable-eng/thanos) [![Slack](https://img.shields.io/badge/join%20slack-%23thanos-brightgreen.svg)](https://join.slack.com/t/improbable-eng/shared_invite/enQtMzQ1ODcyMzQ5MjM4LWY5ZWZmNGM2ODc5MmViNmQ3ZTA3ZTY3NzQwOTBlMTkzZmIxZTIxODk0OWU3YjZhNWVlNDU3MDlkZGViZjhkMjc) [![Docker Pulls](https://img.shields.io/docker/pulls/improbable/thanos.svg?maxAge=604800)](https://hub.docker.com/r/improbable/thanos/) +[![Netlify Status](https://api.netlify.com/api/v1/badges/664a5091-934c-4b0e-a7b6-bc12f822a590/deploy-status)](https://app.netlify.com/sites/thanos-io/deploys) ## Overview @@ -29,7 +30,7 @@ Concretely the aims of the project are: ## Getting Started -* **[Getting Started](docs/getting_started.md)** +* **[Getting Started](docs/getting-started.md)** * [Design](docs/design.md) * [Prom Meetup Slides](https://www.slideshare.net/BartomiejPotka/thanos-global-durable-prometheus-monitoring) * [Introduction blog post](https://improbable.io/games/blog/thanos-prometheus-at-scale) @@ -59,14 +60,27 @@ The philosophy of Thanos and our community is borrowing much from UNIX philosoph * Make it easy to read, write, and, run components * e.g. reduce complexity in system design and implementation +## Releases + +Master should be stable and usable. Every commit to master builds docker image named `master--`. + +We also perform minor releases every 6 weeks. +During that, we build tarballs for major platforms and docker image. + +See [this](docs/release-process.md) for details. + ## Contributing Contributions are very welcome! See our [CONTRIBUTING.md](CONTRIBUTING.md) for more information. ## Community -Thanos is an open source project and we welcome new contributers and members +Thanos is an open source project and we value and welcome new contributors and members of the community. Here are ways to get in touch with the community: * Slack: [#thanos](https://join.slack.com/t/improbable-eng/shared_invite/enQtMzQ1ODcyMzQ5MjM4LWY5ZWZmNGM2ODc5MmViNmQ3ZTA3ZTY3NzQwOTBlMTkzZmIxZTIxODk0OWU3YjZhNWVlNDU3MDlkZGViZjhkMjc) * Issue Tracker: [GitHub Issues](https://github.com/improbable-eng/thanos/issues) + +## Maintainers + +See [MAINTAINERS.md](MAINTAINERS.md) \ No newline at end of file diff --git a/VERSION b/VERSION index 1e4337edbf..1d0ba9ea18 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -0.3.2-master \ No newline at end of file +0.4.0 diff --git a/cmd/thanos/bucket.go b/cmd/thanos/bucket.go index e7a1c22040..2da91cd5e7 100644 --- a/cmd/thanos/bucket.go +++ b/cmd/thanos/bucket.go @@ -85,10 +85,11 @@ func registerBucketVerify(m map[string]setupFunc, root *kingpin.CmdClause, name var backupBkt objstore.Bucket if len(backupconfContentYaml) == 0 { if *repair { - return errors.Wrap(err, "repair is specified, so backup client is required") + return errors.New("repair is specified, so backup client is required") } } else { - backupBkt, err = client.NewBucket(logger, backupconfContentYaml, reg, name) + // nil Prometheus registerer: don't create conflicting metrics + backupBkt, err = client.NewBucket(logger, backupconfContentYaml, nil, name) if err != nil { return err } diff --git a/cmd/thanos/compact.go b/cmd/thanos/compact.go index c95cf113d6..365d211992 100644 --- a/cmd/thanos/compact.go +++ b/cmd/thanos/compact.go @@ -2,17 +2,22 @@ package main import ( "context" + "encoding/json" "fmt" "os" "path" + "path/filepath" "strconv" "strings" "time" "github.com/go-kit/kit/log" "github.com/go-kit/kit/log/level" + "github.com/improbable-eng/thanos/pkg/block" + "github.com/improbable-eng/thanos/pkg/block/metadata" "github.com/improbable-eng/thanos/pkg/compact" "github.com/improbable-eng/thanos/pkg/compact/downsample" + "github.com/improbable-eng/thanos/pkg/objstore" "github.com/improbable-eng/thanos/pkg/objstore/client" "github.com/improbable-eng/thanos/pkg/runutil" "github.com/oklog/run" @@ -77,7 +82,7 @@ func registerCompact(m map[string]setupFunc, app *kingpin.Application, name stri objStoreConfig := regCommonObjStoreFlags(cmd, "", true) - syncDelay := modelDuration(cmd.Flag("sync-delay", "Minimum age of fresh (non-compacted) blocks before they are being processed."). + consistencyDelay := modelDuration(cmd.Flag("consistency-delay", fmt.Sprintf("Minimum age of fresh (non-compacted) blocks before they are being processed. Malformed blocks older than the maximum of consistency-delay and %s will be removed.", compact.MinimumAgeForRemoval)). Default("30m")) retentionRaw := modelDuration(cmd.Flag("retention.resolution-raw", "How long to retain raw samples in bucket. 0d - disables this retention").Default("0d")) @@ -87,6 +92,9 @@ func registerCompact(m map[string]setupFunc, app *kingpin.Application, name stri wait := cmd.Flag("wait", "Do not exit after all compactions have been processed and wait for new work."). Short('w').Bool() + generateMissingIndexCacheFiles := cmd.Flag("index.generate-missing-cache-file", "If enabled, on startup compactor runs an on-off job that scans all the blocks to find all blocks with missing index cache file. It generates those if needed and upload."). + Hidden().Default("false").Bool() + // TODO(bplotka): Remove this flag once https://github.com/improbable-eng/thanos/issues/297 is fixed. disableDownsampling := cmd.Flag("debug.disable-downsampling", "Disables downsampling. This is not recommended "+ "as querying long time ranges without non-downsampled data is not efficient and not useful (is not possible to render all for human eye)."). @@ -98,15 +106,19 @@ func registerCompact(m map[string]setupFunc, app *kingpin.Application, name stri blockSyncConcurrency := cmd.Flag("block-sync-concurrency", "Number of goroutines to use when syncing block metadata from object storage."). Default("20").Int() + compactionConcurrency := cmd.Flag("compact.concurrency", "Number of goroutines to use when compacting groups."). + Default("1").Int() + m[name] = func(g *run.Group, logger log.Logger, reg *prometheus.Registry, tracer opentracing.Tracer, _ bool) error { return runCompact(g, logger, reg, *httpAddr, *dataDir, objStoreConfig, - time.Duration(*syncDelay), + time.Duration(*consistencyDelay), *haltOnError, *acceptMalformedIndex, *wait, + *generateMissingIndexCacheFiles, map[compact.ResolutionLevel]time.Duration{ compact.ResolutionLevelRaw: time.Duration(*retentionRaw), compact.ResolutionLevel5m: time.Duration(*retention5m), @@ -116,6 +128,7 @@ func registerCompact(m map[string]setupFunc, app *kingpin.Application, name stri *disableDownsampling, *maxCompactionLevel, *blockSyncConcurrency, + *compactionConcurrency, ) } } @@ -127,15 +140,17 @@ func runCompact( httpBindAddr string, dataDir string, objStoreConfig *pathOrContent, - syncDelay time.Duration, + consistencyDelay time.Duration, haltOnError bool, acceptMalformedIndex bool, wait bool, + generateMissingIndexCacheFiles bool, retentionByResolution map[compact.ResolutionLevel]time.Duration, component string, disableDownsampling bool, maxCompactionLevel int, blockSyncConcurrency int, + concurrency int, ) error { halted := prometheus.NewGauge(prometheus.GaugeOpts{ Name: "thanos_compactor_halted", @@ -167,7 +182,7 @@ func runCompact( } }() - sy, err := compact.NewSyncer(logger, reg, bkt, syncDelay, + sy, err := compact.NewSyncer(logger, reg, bkt, consistencyDelay, blockSyncConcurrency, acceptMalformedIndex) if err != nil { return errors.Wrap(err, "create syncer") @@ -182,9 +197,11 @@ func runCompact( level.Warn(logger).Log("msg", "Max compaction level is lower than should be", "current", maxCompactionLevel, "default", compactions.maxLevel()) } + ctx, cancel := context.WithCancel(context.Background()) + // Instantiate the compactor with different time slices. Timestamps in TSDB // are in milliseconds. - comp, err := tsdb.NewLeveledCompactor(reg, logger, levels, downsample.NewPool()) + comp, err := tsdb.NewLeveledCompactor(ctx, reg, logger, levels, downsample.NewPool()) if err != nil { return errors.Wrap(err, "create compactor") } @@ -192,13 +209,17 @@ func runCompact( var ( compactDir = path.Join(dataDir, "compact") downsamplingDir = path.Join(dataDir, "downsample") + indexCacheDir = path.Join(dataDir, "index_cache") ) if err := os.RemoveAll(downsamplingDir); err != nil { return errors.Wrap(err, "clean working downsample directory") } - compactor := compact.NewBucketCompactor(logger, sy, comp, compactDir, bkt) + compactor, err := compact.NewBucketCompactor(logger, sy, comp, compactDir, bkt, concurrency) + if err != nil { + return errors.Wrap(err, "create bucket compactor") + } if retentionByResolution[compact.ResolutionLevelRaw].Seconds() != 0 { level.Info(logger).Log("msg", "retention policy of raw samples is enabled", "duration", retentionByResolution[compact.ResolutionLevelRaw]) @@ -210,7 +231,6 @@ func runCompact( level.Info(logger).Log("msg", "retention policy of 1 hour aggregated samples is enabled", "duration", retentionByResolution[compact.ResolutionLevel1h]) } - ctx, cancel := context.WithCancel(context.Background()) f := func() error { if err := compactor.Compact(ctx); err != nil { return errors.Wrap(err, "compaction failed") @@ -247,6 +267,13 @@ func runCompact( g.Add(func() error { defer runutil.CloseWithLogOnErr(logger, bkt, "bucket client") + // Generate index file. + if generateMissingIndexCacheFiles { + if err := genMissingIndexCacheFiles(ctx, logger, bkt, indexCacheDir); err != nil { + return err + } + } + if !wait { return f() } @@ -292,3 +319,118 @@ func runCompact( level.Info(logger).Log("msg", "starting compact node") return nil } + +// genMissingIndexCacheFiles scans over all blocks, generates missing index cache files and uploads them to object storage. +func genMissingIndexCacheFiles(ctx context.Context, logger log.Logger, bkt objstore.Bucket, dir string) error { + if err := os.RemoveAll(dir); err != nil { + return errors.Wrap(err, "clean index cache directory") + } + if err := os.MkdirAll(dir, 0777); err != nil { + return errors.Wrap(err, "create dir") + } + + defer func() { + if err := os.RemoveAll(dir); err != nil { + level.Error(logger).Log("msg", "failed to remove index cache directory", "path", dir, "err", err) + } + }() + + level.Info(logger).Log("msg", "start index cache processing") + + var metas []*metadata.Meta + + if err := bkt.Iter(ctx, "", func(name string) error { + id, ok := block.IsBlockDir(name) + if !ok { + return nil + } + + rc, err := bkt.Get(ctx, path.Join(id.String(), block.MetaFilename)) + if err != nil { + // Probably not finished block, skip it. + if bkt.IsObjNotFoundErr(err) { + level.Warn(logger).Log("msg", "meta file wasn't found", "block", id.String()) + return nil + } + return errors.Wrapf(err, "get meta for block %s", id) + } + defer runutil.CloseWithLogOnErr(logger, rc, "block reader") + + var meta metadata.Meta + if err := json.NewDecoder(rc).Decode(&meta); err != nil { + return errors.Wrap(err, "decode meta") + } + + // New version of compactor pushes index cache along with data block. + // Skip uncompacted blocks. + if meta.Compaction.Level == 1 { + return nil + } + + metas = append(metas, &meta) + + return nil + }); err != nil { + return errors.Wrap(err, "retrieve bucket block metas") + } + + for _, meta := range metas { + if err := generateIndexCacheFile(ctx, bkt, logger, dir, meta); err != nil { + return err + } + } + + level.Info(logger).Log("msg", "generating index cache files is done, you can remove startup argument `index.generate-missing-cache-file`") + return nil +} + +func generateIndexCacheFile( + ctx context.Context, + bkt objstore.Bucket, + logger log.Logger, + indexCacheDir string, + meta *metadata.Meta, +) error { + id := meta.ULID + + bdir := filepath.Join(indexCacheDir, id.String()) + if err := os.MkdirAll(bdir, 0777); err != nil { + return errors.Wrap(err, "create block dir") + } + + defer func() { + if err := os.RemoveAll(bdir); err != nil { + level.Error(logger).Log("msg", "failed to remove index cache directory", "path", bdir, "err", err) + } + }() + + cachePath := filepath.Join(bdir, block.IndexCacheFilename) + cache := path.Join(meta.ULID.String(), block.IndexCacheFilename) + + ok, err := objstore.Exists(ctx, bkt, cache) + if ok { + return nil + } + if err != nil { + return errors.Wrapf(err, "attempt to check if a cached index file exists") + } + + level.Debug(logger).Log("msg", "make index cache", "block", id) + + // Try to download index file from obj store. + indexPath := filepath.Join(bdir, block.IndexFilename) + index := path.Join(id.String(), block.IndexFilename) + + if err := objstore.DownloadFile(ctx, logger, bkt, index, indexPath); err != nil { + return errors.Wrap(err, "download index file") + } + + if err := block.WriteIndexCache(logger, indexPath, cachePath); err != nil { + return errors.Wrap(err, "write index cache") + } + + if err := objstore.UploadFile(ctx, logger, bkt, cachePath, cache); err != nil { + return errors.Wrap(err, "upload index cache") + } + return nil +} diff --git a/cmd/thanos/flags.go b/cmd/thanos/flags.go index db370ed97c..1736cf518d 100644 --- a/cmd/thanos/flags.go +++ b/cmd/thanos/flags.go @@ -46,36 +46,36 @@ func regCommonServerFlags(cmd *kingpin.CmdClause) ( httpBindAddr = regHTTPAddrFlag(cmd) grpcBindAddr, grpcTLSSrvCert, grpcTLSSrvKey, grpcTLSSrvClientCA = regGRPCFlags(cmd) - grpcAdvertiseAddr := cmd.Flag("grpc-advertise-address", "Explicit (external) host:port address to advertise for gRPC StoreAPI in gossip cluster. If empty, 'grpc-address' will be used."). + grpcAdvertiseAddr := cmd.Flag("grpc-advertise-address", "Deprecated(gossip will be removed from v0.5.0): Explicit (external) host:port address to advertise for gRPC StoreAPI in gossip cluster. If empty, 'grpc-address' will be used."). String() - clusterBindAddr := cmd.Flag("cluster.address", "Listen ip:port address for gossip cluster."). + clusterBindAddr := cmd.Flag("cluster.address", "Deprecated(gossip will be removed from v0.5.0): Listen ip:port address for gossip cluster."). Default("0.0.0.0:10900").String() - clusterAdvertiseAddr := cmd.Flag("cluster.advertise-address", "Explicit (external) ip:port address to advertise for gossip in gossip cluster. Used internally for membership only."). + clusterAdvertiseAddr := cmd.Flag("cluster.advertise-address", "Deprecated(gossip will be removed from v0.5.0): Explicit (external) ip:port address to advertise for gossip in gossip cluster. Used internally for membership only."). String() - peers := cmd.Flag("cluster.peers", "Initial peers to join the cluster. It can be either , or . A lookup resolution is done only at the startup.").Strings() + peers := cmd.Flag("cluster.peers", "Deprecated(gossip will be removed from v0.5.0): Initial peers to join the cluster. It can be either , or . A lookup resolution is done only at the startup.").Strings() - gossipInterval := modelDuration(cmd.Flag("cluster.gossip-interval", "Interval between sending gossip messages. By lowering this value (more frequent) gossip messages are propagated across the cluster more quickly at the expense of increased bandwidth. Default is used from a specified network-type."). + gossipInterval := modelDuration(cmd.Flag("cluster.gossip-interval", "Deprecated(gossip will be removed from v0.5.0): Interval between sending gossip messages. By lowering this value (more frequent) gossip messages are propagated across the cluster more quickly at the expense of increased bandwidth. Default is used from a specified network-type."). PlaceHolder("")) - pushPullInterval := modelDuration(cmd.Flag("cluster.pushpull-interval", "Interval for gossip state syncs. Setting this interval lower (more frequent) will increase convergence speeds across larger clusters at the expense of increased bandwidth usage. Default is used from a specified network-type."). + pushPullInterval := modelDuration(cmd.Flag("cluster.pushpull-interval", "Deprecated(gossip will be removed from v0.5.0): Interval for gossip state syncs. Setting this interval lower (more frequent) will increase convergence speeds across larger clusters at the expense of increased bandwidth usage. Default is used from a specified network-type."). PlaceHolder("")) - refreshInterval := modelDuration(cmd.Flag("cluster.refresh-interval", "Interval for membership to refresh cluster.peers state, 0 disables refresh.").Default(cluster.DefaultRefreshInterval.String())) + refreshInterval := modelDuration(cmd.Flag("cluster.refresh-interval", "Deprecated(gossip will be removed from v0.5.0): Interval for membership to refresh cluster.peers state, 0 disables refresh.").Default(cluster.DefaultRefreshInterval.String())) - secretKey := cmd.Flag("cluster.secret-key", "Initial secret key to encrypt cluster gossip. Can be one of AES-128, AES-192, or AES-256 in hexadecimal format.").HexBytes() + secretKey := cmd.Flag("cluster.secret-key", "Deprecated(gossip will be removed from v0.5.0): Initial secret key to encrypt cluster gossip. Can be one of AES-128, AES-192, or AES-256 in hexadecimal format.").HexBytes() networkType := cmd.Flag("cluster.network-type", - fmt.Sprintf("Network type with predefined peers configurations. Sets of configurations accounting the latency differences between network types: %s.", + fmt.Sprintf("Deprecated(gossip will be removed from v0.5.0): Network type with predefined peers configurations. Sets of configurations accounting the latency differences between network types: %s.", strings.Join(cluster.NetworkPeerTypes, ", "), ), ). Default(cluster.LanNetworkPeerType). Enum(cluster.NetworkPeerTypes...) - gossipDisabled := cmd.Flag("cluster.disable", "If true gossip will be disabled and no cluster related server will be started.").Default("false").Bool() + gossipDisabled := cmd.Flag("cluster.disable", "Deprecated(gossip will be removed from v0.5.0): If true gossip will be disabled and no cluster related server will be started.").Default("true").Bool() return grpcBindAddr, httpBindAddr, diff --git a/cmd/thanos/query.go b/cmd/thanos/query.go index fd360e60af..1e229bc789 100644 --- a/cmd/thanos/query.go +++ b/cmd/thanos/query.go @@ -82,15 +82,23 @@ func registerQuery(m map[string]setupFunc, app *kingpin.Application, name string fileSDInterval := modelDuration(cmd.Flag("store.sd-interval", "Refresh interval to re-read file SD files. It is used as a resync fallback."). Default("5m")) + // TODO(bwplotka): Grab this from TTL at some point. dnsSDInterval := modelDuration(cmd.Flag("store.sd-dns-interval", "Interval between DNS resolutions."). Default("30s")) - enableAutodownsampling := cmd.Flag("query.auto-downsampling", "Enable automatic adjustment (step / 5) to what source of data should be used in store gateways if no max_source_resolution param is specified. "). + dnsSDResolver := cmd.Flag("store.sd-dns-resolver", fmt.Sprintf("Resolver to use. Possible options: [%s, %s]", dns.GolangResolverType, dns.MiekgdnsResolverType)). + Default(string(dns.GolangResolverType)).Hidden().String() + + unhealthyStoreTimeout := modelDuration(cmd.Flag("store.unhealthy-timeout", "Timeout before an unhealthy store is cleaned from the store UI page.").Default("5m")) + + enableAutodownsampling := cmd.Flag("query.auto-downsampling", "Enable automatic adjustment (step / 5) to what source of data should be used in store gateways if no max_source_resolution param is specified."). Default("false").Bool() enablePartialResponse := cmd.Flag("query.partial-response", "Enable partial response for queries if no partial_response param is specified."). Default("true").Bool() + defaultEvaluationInterval := modelDuration(cmd.Flag("query.default-evaluation-interval", "Set default evaluation interval for sub queries.").Default("1m")) + storeResponseTimeout := modelDuration(cmd.Flag("store.response-timeout", "If a Store doesn't send any data in this specified duration then a Store will be ignored and partial data will be returned if it's enabled. 0 disables timeout.").Default("0ms")) m[name] = func(g *run.Group, logger log.Logger, reg *prometheus.Registry, tracer opentracing.Tracer, _ bool) error { @@ -121,6 +129,8 @@ func registerQuery(m map[string]setupFunc, app *kingpin.Application, name string fileSD = file.NewDiscovery(conf, logger) } + promql.SetDefaultEvaluationInterval(time.Duration(*defaultEvaluationInterval)) + return runQuery( g, logger, @@ -150,6 +160,8 @@ func registerQuery(m map[string]setupFunc, app *kingpin.Application, name string *enablePartialResponse, fileSD, time.Duration(*dnsSDInterval), + *dnsSDResolver, + time.Duration(*unhealthyStoreTimeout), ) } } @@ -266,6 +278,8 @@ func runQuery( enablePartialResponse bool, fileSD *file.Discovery, dnsSDInterval time.Duration, + dnsSDResolver string, + unhealthyStoreTimeout time.Duration, ) error { // TODO(bplotka in PR #513 review): Move arguments into struct. duplicatedStores := prometheus.NewCounter(prometheus.CounterOpts{ @@ -283,6 +297,7 @@ func runQuery( dnsProvider := dns.NewProvider( logger, extprom.WrapRegistererWithPrefix("thanos_querier_store_apis_", reg), + dns.ResolverType(dnsSDResolver), ) var ( @@ -310,6 +325,7 @@ func runQuery( return specs }, dialOpts, + unhealthyStoreTimeout, ) proxy = store.NewProxyStore(logger, stores.Get, component.Query, selectorLset, storeResponseTimeout) queryableCreator = query.NewQueryableCreator(logger, proxy, replicaLabel) diff --git a/cmd/thanos/receive.go b/cmd/thanos/receive.go index 63a3ba9c36..c12f0472e3 100644 --- a/cmd/thanos/receive.go +++ b/cmd/thanos/receive.go @@ -9,19 +9,23 @@ import ( "github.com/go-kit/kit/log" "github.com/go-kit/kit/log/level" + "github.com/improbable-eng/thanos/pkg/block/metadata" "github.com/improbable-eng/thanos/pkg/component" + "github.com/improbable-eng/thanos/pkg/objstore/client" "github.com/improbable-eng/thanos/pkg/receive" "github.com/improbable-eng/thanos/pkg/runutil" + "github.com/improbable-eng/thanos/pkg/shipper" "github.com/improbable-eng/thanos/pkg/store" "github.com/improbable-eng/thanos/pkg/store/storepb" "github.com/oklog/run" - opentracing "github.com/opentracing/opentracing-go" + "github.com/opentracing/opentracing-go" "github.com/pkg/errors" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" "github.com/prometheus/prometheus/storage/tsdb" + "github.com/prometheus/tsdb/labels" "google.golang.org/grpc" - kingpin "gopkg.in/alecthomas/kingpin.v2" + "gopkg.in/alecthomas/kingpin.v2" ) func registerReceive(m map[string]setupFunc, app *kingpin.Application, name string) { @@ -36,7 +40,18 @@ func registerReceive(m map[string]setupFunc, app *kingpin.Application, name stri dataDir := cmd.Flag("tsdb.path", "Data directory of TSDB."). Default("./data").String() + labelStrs := cmd.Flag("labels", "External labels to announce. This flag will be removed in the future when handling multiple tsdb instances is added.").PlaceHolder("key=\"value\"").Strings() + + objStoreConfig := regCommonObjStoreFlags(cmd, "", false) + + retention := modelDuration(cmd.Flag("tsdb.retention", "How long to retain raw samples on local storage. 0d - disables this retention").Default("15d")) + m[name] = func(g *run.Group, logger log.Logger, reg *prometheus.Registry, tracer opentracing.Tracer, _ bool) error { + lset, err := parseFlagLabels(*labelStrs) + if err != nil { + return errors.Wrap(err, "parse labels") + } + return runReceive( g, logger, @@ -49,6 +64,9 @@ func registerReceive(m map[string]setupFunc, app *kingpin.Application, name stri *httpMetricsBindAddr, *remoteWriteAddress, *dataDir, + objStoreConfig, + lset, + *retention, ) } } @@ -65,15 +83,18 @@ func runReceive( httpMetricsBindAddr string, remoteWriteAddress string, dataDir string, + objStoreConfig *pathOrContent, + lset labels.Labels, + retention model.Duration, ) error { logger = log.With(logger, "component", "receive") level.Warn(logger).Log("msg", "setting up receive; the Thanos receive component is EXPERIMENTAL, it may break significantly without notice") tsdbCfg := &tsdb.Options{ - Retention: model.Duration(time.Hour * 24 * 15), - NoLockfile: true, - MinBlockDuration: model.Duration(time.Hour * 2), - MaxBlockDuration: model.Duration(time.Hour * 2), + RetentionDuration: retention, + NoLockfile: true, + MinBlockDuration: model.Duration(time.Hour * 2), + MaxBlockDuration: model.Duration(time.Hour * 2), } localStorage := &tsdb.ReadyStorage{} @@ -189,7 +210,7 @@ func runReceive( } db := localStorage.Get() - tsdbStore := store.NewTSDBStore(log.With(logger, "component", "thanos-tsdb-store"), reg, db, component.Receive, nil) + tsdbStore := store.NewTSDBStore(log.With(logger, "component", "thanos-tsdb-store"), reg, db, component.Receive, lset) opts, err := defaultGRPCServerOpts(logger, reg, tracer, cert, key, clientCA) if err != nil { @@ -225,6 +246,51 @@ func runReceive( }, ) } + + confContentYaml, err := objStoreConfig.Content() + if err != nil { + return err + } + + upload := true + if len(confContentYaml) == 0 { + level.Info(logger).Log("msg", "No supported bucket was configured, uploads will be disabled") + upload = false + } + + if upload { + // The background shipper continuously scans the data directory and uploads + // new blocks to Google Cloud Storage or an S3-compatible storage service. + bkt, err := client.NewBucket(logger, confContentYaml, reg, component.Sidecar.String()) + if err != nil { + return err + } + + // Ensure we close up everything properly. + defer func() { + if err != nil { + runutil.CloseWithLogOnErr(logger, bkt, "bucket client") + } + }() + + s := shipper.New(logger, reg, dataDir, bkt, func() labels.Labels { return lset }, metadata.ReceiveSource) + + ctx, cancel := context.WithCancel(context.Background()) + g.Add(func() error { + defer runutil.CloseWithLogOnErr(logger, bkt, "bucket client") + + return runutil.Repeat(30*time.Second, ctx.Done(), func() error { + if uploaded, err := s.Sync(ctx); err != nil { + level.Warn(logger).Log("err", err, "uploaded", uploaded) + } + + return nil + }) + }, func(error) { + cancel() + }) + } + level.Info(logger).Log("msg", "starting receiver") return nil diff --git a/cmd/thanos/rule.go b/cmd/thanos/rule.go index e71d57976c..62fe263ba2 100644 --- a/cmd/thanos/rule.go +++ b/cmd/thanos/rule.go @@ -39,7 +39,7 @@ import ( "github.com/improbable-eng/thanos/pkg/tracing" "github.com/improbable-eng/thanos/pkg/ui" "github.com/oklog/run" - opentracing "github.com/opentracing/opentracing-go" + "github.com/opentracing/opentracing-go" "github.com/pkg/errors" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" @@ -53,7 +53,7 @@ import ( "github.com/prometheus/prometheus/util/strutil" "github.com/prometheus/tsdb/labels" "google.golang.org/grpc" - kingpin "gopkg.in/alecthomas/kingpin.v2" + "gopkg.in/alecthomas/kingpin.v2" ) // registerRule registers a rule command. @@ -104,6 +104,9 @@ func registerRule(m map[string]setupFunc, app *kingpin.Application, name string) dnsSDInterval := modelDuration(cmd.Flag("query.sd-dns-interval", "Interval between DNS resolutions."). Default("30s")) + dnsSDResolver := cmd.Flag("query.sd-dns-resolver", "Resolver to use. Possible options: [golang, miekgdns]"). + Default("golang").Hidden().String() + m[name] = func(g *run.Group, logger log.Logger, reg *prometheus.Registry, tracer opentracing.Tracer, _ bool) error { lset, err := parseFlagLabels(*labelStrs) if err != nil { @@ -119,10 +122,10 @@ func registerRule(m map[string]setupFunc, app *kingpin.Application, name string) } tsdbOpts := &tsdb.Options{ - MinBlockDuration: *tsdbBlockDuration, - MaxBlockDuration: *tsdbBlockDuration, - Retention: *tsdbRetention, - NoLockfile: true, + MinBlockDuration: *tsdbBlockDuration, + MaxBlockDuration: *tsdbBlockDuration, + RetentionDuration: *tsdbRetention, + NoLockfile: true, } lookupQueries := map[string]struct{}{} @@ -173,6 +176,7 @@ func registerRule(m map[string]setupFunc, app *kingpin.Application, name string) *queries, fileSD, time.Duration(*dnsSDInterval), + *dnsSDResolver, ) } } @@ -206,6 +210,7 @@ func runRule( queryAddrs []string, fileSD *file.Discovery, dnsSDInterval time.Duration, + dnsSDResolver string, ) error { configSuccess := prometheus.NewGauge(prometheus.GaugeOpts{ Name: "thanos_rule_config_last_reload_successful", @@ -228,7 +233,7 @@ func runRule( Name: "thanos_rule_loaded_rules", Help: "Loaded rules partitioned by file and group", }, - []string{"part_resp_strategy", "file", "group"}, + []string{"strategy", "file", "group"}, ) ruleEvalWarnings := prometheus.NewCounterVec( prometheus.CounterOpts{ @@ -272,11 +277,12 @@ func runRule( dnsProvider := dns.NewProvider( logger, extprom.WrapRegistererWithPrefix("thanos_ruler_query_apis_", reg), + dns.ResolverType(dnsSDResolver), ) // Run rule evaluation and alert notifications. var ( - alertmgrs = newAlertmanagerSet(alertmgrURLs) + alertmgrs = newAlertmanagerSet(logger, alertmgrURLs, dns.ResolverType(dnsSDResolver)) alertQ = alert.NewQueue(logger, reg, 10000, 100, labelsTSDBToProm(lset), alertExcludeLabels) ruleMgrs = thanosrule.Managers{} ) @@ -641,9 +647,9 @@ type alertmanagerSet struct { current []*url.URL } -func newAlertmanagerSet(addrs []string) *alertmanagerSet { +func newAlertmanagerSet(logger log.Logger, addrs []string, dnsSDResolver dns.ResolverType) *alertmanagerSet { return &alertmanagerSet{ - resolver: dns.NewResolver(), + resolver: dns.NewResolver(dnsSDResolver.ToResolver(logger)), addrs: addrs, } } diff --git a/cmd/thanos/sidecar.go b/cmd/thanos/sidecar.go index 1e31b54d15..390ec14cc1 100644 --- a/cmd/thanos/sidecar.go +++ b/cmd/thanos/sidecar.go @@ -11,6 +11,7 @@ import ( "github.com/go-kit/kit/log" "github.com/go-kit/kit/log/level" + "github.com/hashicorp/go-version" "github.com/improbable-eng/thanos/pkg/block/metadata" "github.com/improbable-eng/thanos/pkg/cluster" "github.com/improbable-eng/thanos/pkg/component" @@ -25,6 +26,7 @@ import ( opentracing "github.com/opentracing/opentracing-go" "github.com/pkg/errors" "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/common/model" "github.com/prometheus/tsdb/labels" "google.golang.org/grpc" kingpin "gopkg.in/alecthomas/kingpin.v2" @@ -111,6 +113,17 @@ func runSidecar( maxt: math.MaxInt64, } + confContentYaml, err := objStoreConfig.Content() + if err != nil { + return errors.Wrap(err, "getting object store config") + } + + var uploads = true + if len(confContentYaml) == 0 { + level.Info(logger).Log("msg", "no supported bucket was configured, uploads will be disabled") + uploads = false + } + // Setup all the concurrent groups. { promUp := prometheus.NewGauge(prometheus.GaugeOpts{ @@ -125,6 +138,29 @@ func runSidecar( ctx, cancel := context.WithCancel(context.Background()) g.Add(func() error { + // Only check Prometheus's flags when upload is enabled. + if uploads { + // Retry infinitely until we get Prometheus version. + if err := runutil.Retry(2*time.Second, ctx.Done(), func() error { + if m.version, err = promclient.PromVersion(logger, m.promURL); err != nil { + level.Warn(logger).Log( + "msg", "failed to get Prometheus version. Is Prometheus running? Retrying", + "err", err, + ) + return errors.Wrapf(err, "fetch Prometheus version") + } + + return nil + }); err != nil { + return errors.Wrap(err, "fetch Prometheus version") + } + + // Check prometheus's flags to ensure sane sidecar flags. + if err := validatePrometheus(ctx, logger, m); err != nil { + return errors.Wrap(err, "validate Prometheus flags") + } + } + // Blocking query of external labels before joining as a Source Peer into gossip. // We retry infinitely until we reach and fetch labels from our Prometheus. err := runutil.Retry(2*time.Second, ctx.Done(), func() error { @@ -229,17 +265,6 @@ func runSidecar( }) } - confContentYaml, err := objStoreConfig.Content() - if err != nil { - return err - } - - var uploads = true - if len(confContentYaml) == 0 { - level.Info(logger).Log("msg", "No supported bucket was configured, uploads will be disabled") - uploads = false - } - if uploads { // The background shipper continuously scans the data directory and uploads // new blocks to Google Cloud Storage or an S3-compatible storage service. @@ -265,10 +290,7 @@ func runSidecar( var s *shipper.Shipper if uploadCompacted { - s, err = shipper.NewWithCompacted(ctx, logger, reg, dataDir, bkt, m.Labels, metadata.SidecarSource, m.promURL) - if err != nil { - return errors.Wrap(err, "create shipper") - } + s = shipper.NewWithCompacted(logger, reg, dataDir, bkt, m.Labels, metadata.SidecarSource) } else { s = shipper.New(logger, reg, dataDir, bkt, m.Labels, metadata.SidecarSource) } @@ -298,13 +320,45 @@ func runSidecar( return nil } +func validatePrometheus(ctx context.Context, logger log.Logger, m *promMetadata) error { + if m.version == nil { + level.Warn(logger).Log("msg", "fetched version is nil or invalid. Unable to know whether Prometheus supports /version endpoint, skip validation") + return nil + } + + if m.version.LessThan(promclient.FlagsVersion) { + level.Warn(logger).Log("msg", + "Prometheus doesn't support flags endpoint, skip validation", "version", m.version.Original()) + return nil + } + + flags, err := promclient.ConfiguredFlags(ctx, logger, m.promURL) + if err != nil { + return errors.Wrap(err, "failed to check flags") + } + + // Check if compaction is disabled. + if flags.TSDBMinTime != flags.TSDBMaxTime { + return errors.Errorf("found that TSDB Max time is %s and Min time is %s. "+ + "Compaction needs to be disabled (storage.tsdb.min-block-duration = storage.tsdb.max-block-duration)", flags.TSDBMaxTime, flags.TSDBMinTime) + } + + // Check if block time is 2h. + if flags.TSDBMinTime != model.Duration(2*time.Hour) { + level.Warn(logger).Log("msg", "found that TSDB block time is not 2h. Only 2h block time is recommended.", "block-time", flags.TSDBMinTime) + } + + return nil +} + type promMetadata struct { promURL *url.URL - mtx sync.Mutex - mint int64 - maxt int64 - labels labels.Labels + mtx sync.Mutex + mint int64 + maxt int64 + labels labels.Labels + version *version.Version } func (s *promMetadata) UpdateLabels(ctx context.Context, logger log.Logger) error { diff --git a/cmd/thanos/store.go b/cmd/thanos/store.go index 7f77e15412..c135956403 100644 --- a/cmd/thanos/store.go +++ b/cmd/thanos/store.go @@ -12,13 +12,14 @@ import ( "github.com/improbable-eng/thanos/pkg/objstore/client" "github.com/improbable-eng/thanos/pkg/runutil" "github.com/improbable-eng/thanos/pkg/store" + storecache "github.com/improbable-eng/thanos/pkg/store/cache" "github.com/improbable-eng/thanos/pkg/store/storepb" "github.com/oklog/run" - "github.com/opentracing/opentracing-go" + opentracing "github.com/opentracing/opentracing-go" "github.com/pkg/errors" "github.com/prometheus/client_golang/prometheus" "google.golang.org/grpc" - "gopkg.in/alecthomas/kingpin.v2" + kingpin "gopkg.in/alecthomas/kingpin.v2" ) // registerStore registers a store command. @@ -120,12 +121,23 @@ func runStore( } }() + // TODO(bwplotka): Add as a flag? + maxItemSizeBytes := indexCacheSizeBytes / 2 + + indexCache, err := storecache.NewIndexCache(logger, reg, storecache.Opts{ + MaxSizeBytes: indexCacheSizeBytes, + MaxItemSizeBytes: maxItemSizeBytes, + }) + if err != nil { + return errors.Wrap(err, "create index cache") + } + bs, err := store.NewBucketStore( logger, reg, bkt, dataDir, - indexCacheSizeBytes, + indexCache, chunkPoolSizeBytes, maxSampleCount, maxConcurrent, diff --git a/docs/components/bucket.md b/docs/components/bucket.md index e887a036ec..380f24d222 100644 --- a/docs/components/bucket.md +++ b/docs/components/bucket.md @@ -1,3 +1,9 @@ +--- +title: Bucket +type: docs +menu: components +--- + # Bucket The bucket component of Thanos is a set of commands to inspect data in object storage buckets. @@ -5,7 +11,7 @@ It is normally run as a stand alone command to aid with troubleshooting. Example: -``` +```bash $ thanos bucket verify --objstore.config-file=bucket.yml ``` diff --git a/docs/components/compact.md b/docs/components/compact.md index efd1455fc9..439adc520b 100644 --- a/docs/components/compact.md +++ b/docs/components/compact.md @@ -1,3 +1,9 @@ +--- +title: Compact +type: docs +menu: components +--- + # Compact The compactor component of Thanos applies the compaction procedure of the Prometheus 2.0 storage engine to block data stored in object storage. @@ -5,7 +11,7 @@ It is generally not semantically concurrency safe and must be deployed as a sing Example: -``` +```bash $ thanos compact --data-dir /tmp/thanos-compact --objstore.config-file=bucket.yml ``` @@ -31,44 +37,48 @@ usage: thanos compact [] continuously compacts blocks in an object store bucket Flags: - -h, --help Show context-sensitive help (also try --help-long and - --help-man). - --version Show application version. - --log.level=info Log filtering level. - --log.format=logfmt Log format to use. + -h, --help Show context-sensitive help (also try --help-long + and --help-man). + --version Show application version. + --log.level=info Log filtering level. + --log.format=logfmt Log format to use. --gcloudtrace.project=GCLOUDTRACE.PROJECT - GCP project to send Google Cloud Trace tracings to. - If empty, tracing will be disabled. + GCP project to send Google Cloud Trace tracings + to. If empty, tracing will be disabled. --gcloudtrace.sample-factor=1 - How often we send traces (1/). If 0 no - trace will be sent periodically, unless forced by - baggage item. See `pkg/tracing/tracing.go` for - details. + How often we send traces (1/). If + 0 no trace will be sent periodically, unless + forced by baggage item. See + `pkg/tracing/tracing.go` for details. --http-address="0.0.0.0:10902" - Listen host:port for HTTP endpoints. - --data-dir="./data" Data directory in which to cache blocks and process - compactions. + Listen host:port for HTTP endpoints. + --data-dir="./data" Data directory in which to cache blocks and + process compactions. --objstore.config-file= - Path to YAML file that contains object store - configuration. + Path to YAML file that contains object store + configuration. --objstore.config= - Alternative to 'objstore.config-file' flag. Object - store configuration in YAML. - --sync-delay=30m Minimum age of fresh (non-compacted) blocks before - they are being processed. + Alternative to 'objstore.config-file' flag. + Object store configuration in YAML. + --consistency-delay=30m Minimum age of fresh (non-compacted) blocks + before they are being processed. Malformed blocks + older than the maximum of consistency-delay and + 30m0s will be removed. --retention.resolution-raw=0d - How long to retain raw samples in bucket. 0d - - disables this retention + How long to retain raw samples in bucket. 0d - + disables this retention --retention.resolution-5m=0d - How long to retain samples of resolution 1 (5 - minutes) in bucket. 0d - disables this retention + How long to retain samples of resolution 1 (5 + minutes) in bucket. 0d - disables this retention --retention.resolution-1h=0d - How long to retain samples of resolution 2 (1 hour) - in bucket. 0d - disables this retention - -w, --wait Do not exit after all compactions have been processed - and wait for new work. + How long to retain samples of resolution 2 (1 + hour) in bucket. 0d - disables this retention + -w, --wait Do not exit after all compactions have been + processed and wait for new work. --block-sync-concurrency=20 - Number of goroutines to use when syncing block - metadata from object storage. + Number of goroutines to use when syncing block + metadata from object storage. + --compact.concurrency=1 Number of goroutines to use when compacting + groups. ``` diff --git a/docs/components/query.md b/docs/components/query.md index cd38da1053..7ce69b9d50 100644 --- a/docs/components/query.md +++ b/docs/components/query.md @@ -1,17 +1,23 @@ +--- +title: Query +type: docs +menu: components +--- + # Query The query component implements the Prometheus HTTP v1 API to query data in a Thanos cluster via PromQL. -It gathers the data needed to evaluate the query from underlying StoreAPIs. See [here](/docs/service_discovery.md) +It gathers the data needed to evaluate the query from underlying StoreAPIs. See [here](../service-discovery.md) on how to connect querier with desired StoreAPIs. Querier currently is fully stateless and horizontally scalable. -``` +```bash $ thanos query \ --http-address "0.0.0.0:9090" \ --store ":" \ - --store ":" \ + --store ":" ``` ## Deduplication @@ -193,44 +199,54 @@ Flags: CA is specified, there is no client verification on server side. (tls.NoClientCert) --grpc-advertise-address=GRPC-ADVERTISE-ADDRESS + Deprecated(gossip will be removed from v0.5.0): Explicit (external) host:port address to advertise for gRPC StoreAPI in gossip cluster. If empty, 'grpc-address' will be used. --cluster.address="0.0.0.0:10900" + Deprecated(gossip will be removed from v0.5.0): Listen ip:port address for gossip cluster. --cluster.advertise-address=CLUSTER.ADVERTISE-ADDRESS + Deprecated(gossip will be removed from v0.5.0): Explicit (external) ip:port address to advertise for gossip in gossip cluster. Used internally for membership only. --cluster.peers=CLUSTER.PEERS ... + Deprecated(gossip will be removed from v0.5.0): Initial peers to join the cluster. It can be either , or . A lookup resolution is done only at the startup. --cluster.gossip-interval= + Deprecated(gossip will be removed from v0.5.0): Interval between sending gossip messages. By lowering this value (more frequent) gossip messages are propagated across the cluster more quickly at the expense of increased bandwidth. Default is used from a specified network-type. --cluster.pushpull-interval= + Deprecated(gossip will be removed from v0.5.0): Interval for gossip state syncs. Setting this interval lower (more frequent) will increase convergence speeds across larger clusters at the expense of increased bandwidth usage. Default is used from a specified network-type. --cluster.refresh-interval=1m + Deprecated(gossip will be removed from v0.5.0): Interval for membership to refresh cluster.peers state, 0 disables refresh. --cluster.secret-key=CLUSTER.SECRET-KEY + Deprecated(gossip will be removed from v0.5.0): Initial secret key to encrypt cluster gossip. Can be one of AES-128, AES-192, or AES-256 in hexadecimal format. --cluster.network-type=lan + Deprecated(gossip will be removed from v0.5.0): Network type with predefined peers configurations. Sets of configurations accounting the latency differences between network types: local, lan, wan. - --cluster.disable If true gossip will be disabled and no cluster + --cluster.disable Deprecated(gossip will be removed from v0.5.0): + If true gossip will be disabled and no cluster related server will be started. --http-advertise-address=HTTP-ADVERTISE-ADDRESS Explicit (external) host:port address to @@ -293,11 +309,17 @@ Flags: is used as a resync fallback. --store.sd-dns-interval=30s Interval between DNS resolutions. + --store.unhealthy-timeout=5m + Timeout before an unhealthy store is cleaned + from the store UI page. --query.auto-downsampling Enable automatic adjustment (step / 5) to what source of data should be used in store gateways if no max_source_resolution param is specified. --query.partial-response Enable partial response for queries if no partial_response param is specified. + --query.default-evaluation-interval=1m + Set default evaluation interval for sub + queries. --store.response-timeout=0ms If a Store doesn't send any data in this specified duration then a Store will be ignored diff --git a/docs/components/rule.md b/docs/components/rule.md index fb7304d757..5e607c1740 100644 --- a/docs/components/rule.md +++ b/docs/components/rule.md @@ -1,3 +1,9 @@ +--- +title: Rule +type: docs +menu: components +--- + # Rule (aka Ruler) _**NOTE:** It is recommended to ma deploying rules inside the relevant Prometheus servers locally. Use ruler only on specific cases. Read details[below](rule.md#Risk) why._ @@ -13,7 +19,7 @@ You can think of Rule as a simplified Prometheus that does not require a sidecar The data of each Rule node can be labeled to satisfy the clusters labeling scheme. High-availability pairs can be run in parallel and should be distinguished by the designated replica label, just like regular Prometheus servers. Read more about Ruler in HA in [here](rule.md#Ruler_HA) -``` +```bash $ thanos rule \ --data-dir "/path/to/data" \ --eval-interval "30s" \ @@ -175,44 +181,54 @@ Flags: CA is specified, there is no client verification on server side. (tls.NoClientCert) --grpc-advertise-address=GRPC-ADVERTISE-ADDRESS + Deprecated(gossip will be removed from v0.5.0): Explicit (external) host:port address to advertise for gRPC StoreAPI in gossip cluster. If empty, 'grpc-address' will be used. --cluster.address="0.0.0.0:10900" + Deprecated(gossip will be removed from v0.5.0): Listen ip:port address for gossip cluster. --cluster.advertise-address=CLUSTER.ADVERTISE-ADDRESS + Deprecated(gossip will be removed from v0.5.0): Explicit (external) ip:port address to advertise for gossip in gossip cluster. Used internally for membership only. --cluster.peers=CLUSTER.PEERS ... + Deprecated(gossip will be removed from v0.5.0): Initial peers to join the cluster. It can be either , or . A lookup resolution is done only at the startup. --cluster.gossip-interval= + Deprecated(gossip will be removed from v0.5.0): Interval between sending gossip messages. By lowering this value (more frequent) gossip messages are propagated across the cluster more quickly at the expense of increased bandwidth. Default is used from a specified network-type. --cluster.pushpull-interval= + Deprecated(gossip will be removed from v0.5.0): Interval for gossip state syncs. Setting this interval lower (more frequent) will increase convergence speeds across larger clusters at the expense of increased bandwidth usage. Default is used from a specified network-type. --cluster.refresh-interval=1m + Deprecated(gossip will be removed from v0.5.0): Interval for membership to refresh cluster.peers state, 0 disables refresh. --cluster.secret-key=CLUSTER.SECRET-KEY + Deprecated(gossip will be removed from v0.5.0): Initial secret key to encrypt cluster gossip. Can be one of AES-128, AES-192, or AES-256 in hexadecimal format. --cluster.network-type=lan + Deprecated(gossip will be removed from v0.5.0): Network type with predefined peers configurations. Sets of configurations accounting the latency differences between network types: local, lan, wan. - --cluster.disable If true gossip will be disabled and no cluster + --cluster.disable Deprecated(gossip will be removed from v0.5.0): + If true gossip will be disabled and no cluster related server will be started. --label=="" ... Labels to be applied to all generated metrics diff --git a/docs/components/sidecar.md b/docs/components/sidecar.md index 4966681a1b..58ae7d4628 100644 --- a/docs/components/sidecar.md +++ b/docs/components/sidecar.md @@ -1,3 +1,9 @@ +--- +title: Sidecar +type: docs +menu: components +--- + # Sidecar The sidecar component of Thanos gets deployed along with a Prometheus instance. It implements Thanos' Store API on top of Prometheus' remote-read API and advertises itself as a data source to the cluster. Thereby queriers in the cluster can treat Prometheus servers as yet another source of time series data without directly talking to its APIs. @@ -8,19 +14,20 @@ Prometheus servers connected to the Thanos cluster via the sidecar are subject t * The minimum Prometheus version is 2.2.1 * The `external_labels` section of the configuration implements is in line with the desired label scheme (will be used by query layer to filter out store APIs to query). * The `--web.enable-lifecycle` flag is enabled if you want to use `reload.*` flags. -* The `--storage.tsdb.min-block-duration` and `--storage.tsdb.max-block-duration` must be set to equal values to disable local compaction. The default of `2h` is recommended. +* The `--storage.tsdb.min-block-duration` and `--storage.tsdb.max-block-duration` must be set to equal values to disable local compaction on order to use Thanos sidecar upload. Leave local compaction on if sidecar just exposes StoreAPI and your retention is normal. The default of `2h` is recommended. + Mentioned parameters set to equal values disable the internal Prometheus compaction, which is needed to avoid the uploaded data corruption when thanos compactor does its job, this is critical for data consistency and should not be ignored if you plan to use Thanos compactor. Even though you set mentioned parameters equal, you might observe Prometheus internal metric `prometheus_tsdb_compactions_total` being incremented, don't be confused by that: Prometheus writes initial head block to filestem via internal compaction mechanis, but if you followed recommendations - data won't be modified by Prometheus before sidecar uploads it. Thanos sidecar will also check sanity of the flags set to Prometheus on the startup and log errors or warning if they have been configured improperly (#838). The retention is recommended to not be lower than three times the block duration. This achieves resilience in the face of connectivity issues to the object storage since all local data will remain available within the Thanos cluster. If connectivity gets restored the backlog of blocks gets uploaded to the object storage. -```console +```bash $ prometheus \ --storage.tsdb.max-block-duration=2h \ --storage.tsdb.min-block-duration=2h \ --web.enable-lifecycle ``` -```console +```bash $ thanos sidecar \ --tsdb.path "/path/to/prometheus/data/dir" \ --prometheus.url "http://localhost:9090" \ @@ -76,44 +83,54 @@ Flags: CA is specified, there is no client verification on server side. (tls.NoClientCert) --grpc-advertise-address=GRPC-ADVERTISE-ADDRESS + Deprecated(gossip will be removed from v0.5.0): Explicit (external) host:port address to advertise for gRPC StoreAPI in gossip cluster. If empty, 'grpc-address' will be used. --cluster.address="0.0.0.0:10900" + Deprecated(gossip will be removed from v0.5.0): Listen ip:port address for gossip cluster. --cluster.advertise-address=CLUSTER.ADVERTISE-ADDRESS + Deprecated(gossip will be removed from v0.5.0): Explicit (external) ip:port address to advertise for gossip in gossip cluster. Used internally for membership only. --cluster.peers=CLUSTER.PEERS ... + Deprecated(gossip will be removed from v0.5.0): Initial peers to join the cluster. It can be either , or . A lookup resolution is done only at the startup. --cluster.gossip-interval= + Deprecated(gossip will be removed from v0.5.0): Interval between sending gossip messages. By lowering this value (more frequent) gossip messages are propagated across the cluster more quickly at the expense of increased bandwidth. Default is used from a specified network-type. --cluster.pushpull-interval= + Deprecated(gossip will be removed from v0.5.0): Interval for gossip state syncs. Setting this interval lower (more frequent) will increase convergence speeds across larger clusters at the expense of increased bandwidth usage. Default is used from a specified network-type. --cluster.refresh-interval=1m + Deprecated(gossip will be removed from v0.5.0): Interval for membership to refresh cluster.peers state, 0 disables refresh. --cluster.secret-key=CLUSTER.SECRET-KEY + Deprecated(gossip will be removed from v0.5.0): Initial secret key to encrypt cluster gossip. Can be one of AES-128, AES-192, or AES-256 in hexadecimal format. --cluster.network-type=lan + Deprecated(gossip will be removed from v0.5.0): Network type with predefined peers configurations. Sets of configurations accounting the latency differences between network types: local, lan, wan. - --cluster.disable If true gossip will be disabled and no cluster + --cluster.disable Deprecated(gossip will be removed from v0.5.0): + If true gossip will be disabled and no cluster related server will be started. --prometheus.url=http://localhost:9090 URL at which to reach Prometheus's API. For diff --git a/docs/components/store.md b/docs/components/store.md index 7d26d79627..462b4a4142 100644 --- a/docs/components/store.md +++ b/docs/components/store.md @@ -1,9 +1,15 @@ +--- +title: Store +type: docs +menu: components +--- + # Store The store component of Thanos implements the Store API on top of historical data in an object storage bucket. It acts primarily as an API gateway and therefore does not need significant amounts of local disk space. It joins a Thanos cluster on startup and advertises the data it can access. It keeps a small amount of information about all remote blocks on local disk and keeps it in sync with the bucket. This data is generally safe to delete across restarts at the cost of increased startup times. -``` +```bash $ thanos store \ --data-dir "/local/state/data/dir" \ --cluster.peers "thanos-cluster.example.org" \ @@ -61,44 +67,54 @@ Flags: CA is specified, there is no client verification on server side. (tls.NoClientCert) --grpc-advertise-address=GRPC-ADVERTISE-ADDRESS + Deprecated(gossip will be removed from v0.5.0): Explicit (external) host:port address to advertise for gRPC StoreAPI in gossip cluster. If empty, 'grpc-address' will be used. --cluster.address="0.0.0.0:10900" + Deprecated(gossip will be removed from v0.5.0): Listen ip:port address for gossip cluster. --cluster.advertise-address=CLUSTER.ADVERTISE-ADDRESS + Deprecated(gossip will be removed from v0.5.0): Explicit (external) ip:port address to advertise for gossip in gossip cluster. Used internally for membership only. --cluster.peers=CLUSTER.PEERS ... + Deprecated(gossip will be removed from v0.5.0): Initial peers to join the cluster. It can be either , or . A lookup resolution is done only at the startup. --cluster.gossip-interval= + Deprecated(gossip will be removed from v0.5.0): Interval between sending gossip messages. By lowering this value (more frequent) gossip messages are propagated across the cluster more quickly at the expense of increased bandwidth. Default is used from a specified network-type. --cluster.pushpull-interval= + Deprecated(gossip will be removed from v0.5.0): Interval for gossip state syncs. Setting this interval lower (more frequent) will increase convergence speeds across larger clusters at the expense of increased bandwidth usage. Default is used from a specified network-type. --cluster.refresh-interval=1m + Deprecated(gossip will be removed from v0.5.0): Interval for membership to refresh cluster.peers state, 0 disables refresh. --cluster.secret-key=CLUSTER.SECRET-KEY + Deprecated(gossip will be removed from v0.5.0): Initial secret key to encrypt cluster gossip. Can be one of AES-128, AES-192, or AES-256 in hexadecimal format. --cluster.network-type=lan + Deprecated(gossip will be removed from v0.5.0): Network type with predefined peers configurations. Sets of configurations accounting the latency differences between network types: local, lan, wan. - --cluster.disable If true gossip will be disabled and no cluster + --cluster.disable Deprecated(gossip will be removed from v0.5.0): + If true gossip will be disabled and no cluster related server will be started. --data-dir="./data" Data directory in which to cache remote blocks. --index-cache-size=250MB Maximum size of items held in the index cache. diff --git a/docs/troubleshooting/dev.md b/docs/contributing/dev.md similarity index 91% rename from docs/troubleshooting/dev.md rename to docs/contributing/dev.md index 4955450e84..4fe58d7802 100644 --- a/docs/troubleshooting/dev.md +++ b/docs/contributing/dev.md @@ -1,4 +1,8 @@ -# Troubleshooting for dev workflow +--- +title: Troubleshooting for dev workflow +type: doc +menu: contributing +--- ## Dep `grouped write of manifest, lock and vendor: scratch directory ... already exists, please remove it` diff --git a/docs/contributing/how-to-contribute-to-docs.md b/docs/contributing/how-to-contribute-to-docs.md new file mode 100644 index 0000000000..8903751333 --- /dev/null +++ b/docs/contributing/how-to-contribute-to-docs.md @@ -0,0 +1,94 @@ +--- +title: Contribute to docs +type: docs +menu: contributing +--- + +# How to contribute to Docs/Website + +`./docs` directory is used as markdown source files using [blackfriday](https://github.com/russross/blackfriday) to render Thanos website resources. + +However the aim for those is to also have those `*.md` files renderable and usable (including links) via GitHub. + +To make that happen we use following rules and helpers that are listed here + +## Front Matter + +[Front Matter](https://gohugo.io/content-management/front-matter/) is essential on top of every markdown file if +you want to link this file into any menu/submenu option. We use YAML formatting. This will render +in GitHub as markdown just fine: + +```md + +--- +title: +type: ... +weight: +menu: # This also is refered in permalinks. +--- + +``` + +## Links + +Aim is to match linking behavior in website being THE SAME as Github. This means: + +* For files in Hugo dir (so `./docs`). Put `slug: /` +* For any sub dir add to `website/hugo.yaml` new dir as key `permalinks:` with `: //:filname.md` + +Then everywhere use native markdown *relative* symbolic links if you want to reference some mardkown file from `docs`: + +`[title]( relative path to .md file )` + +Or absolute path to the project repository if you want to link to exact commit e.g: + +`[title]( /Makefile )` + +Small [post processing script](/scripts/websitepreprocess.sh) adjusts link for Hugo rendering. + +NOTE: Spaces matters so: `[xxx]( link` and `[xxx] (link` will not work. + +Why? + +* Links works on GitHub +* Links works on website +* Markdown plugins works as expected (e.g IDE integrations) +* We use liche to test links. + +## Sections/Menu + +New menus `.Site.Menus` are added as soon as some file has Front Matter with certain `menu`. + +Keep `menu` the same as sub-directory the file is in. This will help to manage all docs. + +Show new menu section in main page by changing `website/layouts/_default/baseof.html` file. + +## Logos + +If your company is using Thanos in production, we'd love to showcase your company's logo on our page. Add yourself in [`website/data/sponsors.yml`](/website/data/sponsors.yml) like so: + +```yml +- name: My Awesome Company + url: https://wwww.company.com + logo: company.png +``` + +Copy your company's logo in `website/static/logos`, and make sure it follows these rules: + +* Rectangle shape +* Greyscale is prefered but color is fine +* Keep it under 50KB + +## Testing + +Every PR is building website and on success it shows the link to preview. + +## Deployment. + +We use [Netlify](https://www.netlify.com/) for hosting. We are using Open Source license (PRO). Thanks Netlify for this! + +On every commit to `master` netlify runs CI that invokes `make web` (defined in [netlify.toml](https://github.com/improbable-eng/thanos/blob/master/netlify.toml))) + +NOTE: Check for status badge in README for build status on the page. + +If master build for netlify succeed, the new content is published automatically. \ No newline at end of file diff --git a/docs/design.md b/docs/design.md index 803632c1bd..443d3045c8 100644 --- a/docs/design.md +++ b/docs/design.md @@ -1,3 +1,10 @@ +--- +title: Design +type: docs +menu: thanos +slug: /design.md +--- + # Design Thanos is a set of components that can be composed into a highly available Prometheus setup with long term storage capabilities. Its main goals are operation simplicity and retaining of Prometheus's reliability properties. @@ -24,7 +31,7 @@ The sidecar implements the gRPC service on top of Prometheus' [HTTP and remote-r Data sources that persist their data for long-term storage do so via the Prometheus 2.0 storage engine. The storage engine periodically produces immutable blocks of data for a fixed time range. A block is a directory with a handful of larger files containing all sample data and peristed indices that are required to retrieve the data: -``` +```bash 01BX6V6TY06G5MFQ0GPH7EMXRH ├── chunks │ ├── 000001 @@ -34,14 +41,12 @@ Data sources that persist their data for long-term storage do so via the Prometh └── meta.json ``` - A blocks top-level directory is a ULID (like UUID but lexicographically sortable and encoding the creation time). * Chunk files hold a few hundred MB worth of chunks each. Chunks for the same series are sequentially aligned. Series in return are aligned by their metric name. This becomes relevant further down. * The index file holds all information needed to lookup specific series by their labels and the positions of their chunks. * `meta.json` holds meta information about a block like stats, time range, and compaction level. - Those block files can be backed up to an object storage and later be queried by another component (see below). All data is uploaded as it is created by the Prometheus server/storage engine. The `meta.json` file may be extended by a `thanos` section, to which Thanos-specific metadata can be added. Currently this it includes the "external labels" the producer of the block has assigned. This later helps in filtering blocks for querying without accessing their data files. The meta.json is updated during upload time on sidecars. diff --git a/docs/getting-started.md b/docs/getting-started.md new file mode 100644 index 0000000000..d7d53d203e --- /dev/null +++ b/docs/getting-started.md @@ -0,0 +1,263 @@ +--- +title: Getting Started +type: docs +menu: thanos +weight: 1 +slug: /getting-started.md +--- + +# Getting started + +Thanos provides a global query view, data backup, and historical data access as its core features in a single binary. All three features can be run independently of each other. This allows you to have a subset of Thanos features ready for immediate benefit or testing, while also making it flexible for gradual roll outs in more complex environments. + +In this quick-start guide, we will configure Thanos and all components mentioned to work against a Google Cloud Storage bucket. +At the moment, Thanos is able to use [different storage providers](storage.md), with the ability to add more providers as necessary. + +Thanos will work in cloud native environments as well as more traditional ones. Some users run Thanos in Kubernetes while others on bare metal. More deployments examples and stories will be described soon. + +## Architecture Overview + +architecture overview + +## Requirements + +* One or more [Prometheus](https://prometheus.io) v2.2.1+ installations +* golang 1.10+ +* An object storage bucket (optional) + +## Get Thanos! + +You can find the latest Thanos release [here](https://github.com/improbable-eng/thanos/releases). + + +If you want to build Thanos from source, make sure you have installed `bzr` and `git`. `bzr` is required, because `go` modules will use whatever VCS dependency use and in our case a single deps is using `bzr`. +And that you have a working installation of the Go [toolchain](https://github.com/golang/tools) (`GOPATH`, `PATH=${GOPATH}/bin:${PATH}`), Thanos can be downloaded and built by running: + +```bash +go get -d github.com/improbable-eng/thanos/... +cd ${GOPATH}/src/github.com/improbable-eng/thanos +make +``` + +The `thanos` binary should now be in your `$PATH` and is the only thing required to deploy any of its components. + +You may meet below error: + +``` +go: verifying github.com/grpc-ecosystem/go-grpc-middleware@v1.0.0: checksum mismatch + downloaded: h1:BWIsLfhgKhV5g/oF34aRjniBHLTZe5DNekSjbAjIS6c= + go.sum: h1:Iju5GlWwrvL6UBg4zJJt3btmonfrMlCDdsejg4CZE7c= +Makefile:183: recipe for target 'go-mod-tidy' failed +``` + +If your `golang` version is `1.11.4`, you can run following cmd then `make` would pass: + +``` +go clean -modcache +``` + +If your `golang` version is below `1.11.4`, highly recommend you upgrade to `1.11.4` or above. + +## Prometheus + +Thanos bases itself on vanilla [Prometheus](https://prometheus.io/) (v2.2.1+). + +Here's the Prometheus' versions Thanos is tested against: + +[Makefile](/Makefile#35) + +## Components + +Following the KISS and Unix philosophies, Thanos is made of a set of components with each filling a specific role. + +* Sidecar: connects to Prometheus and reads its data for query and/or upload it to cloud storage +* Store Gateway: exposes the content of a cloud storage bucket +* Compactor: compact and downsample data stored in remote storage +* Receiver: receives data from Prometheus' remote-write WAL, exposes it and/or upload it to cloud storage +* Ruler: evaluates recording and alerting rules against data in Thanos for exposition and/or upload +* Query Gateway: implements Prometheus' v1 API to aggregate data from the underlying components + +### [Sidecar](components/sidecar.md) + +Thanos integrates with existing Prometheus servers through a [Sidecar process](https://docs.microsoft.com/en-us/azure/architecture/patterns/sidecar#solution), which runs on the same machine or in the same pod as the Prometheus server. + +The purpose of the Sidecar is to backup Prometheus data into an Object Storage bucket, and giving other Thanos components access to the Prometheus instance the Sidecar is attached to via a gRPC API. + +The Sidecar makes use of the `reload` Prometheus endpoint. Make sure it's enabled with the flag `--web.enable-lifecycle`. + +#### External storage + +The following configures the sidecar to write Prometheus' data into a configured object storage: + +```bash +thanos sidecar \ + --tsdb.path /var/prometheus \ # TSDB data directory of Prometheus + --prometheus.url "http://localhost:9090" \ # Be sure that the sidecar can use this url! + --objstore.config-file bucket_config.yaml \ # Storage configuration for uploading data +``` + +The format of YAML file depends on the provider you choose. Examples of config and up-to-date list of storage types Thanos supports is available [here](storage.md). + +Rolling this out has little to zero impact on the running Prometheus instance. It is a good start to ensure you are backing up your data while figuring out the other pieces of Thanos. + +If you are not interested in backing up any data, the `--objstore.config-file` flag can simply be omitted. + +* _[Example Kubernetes manifest](https://github.com/improbable-eng/thanos/tree/master/tutorials/kubernetes-demo/manifests/prometheus-ha-sidecar.yaml)_ +* _[Example Kubernetes manifest with Minio upload](https://github.com/improbable-eng/thanos/tree/master/tutorials/kubernetes-demo/manifests/prometheus-ha-sidecar-lts.yaml)_ +* _[Example Deploying sidecar using official Prometheus Helm Chart](https://github.com/improbable-eng/thanos/tree/master/tutorials/kubernetes-helm/README.md)_ +* _[Details & Config for other object stores](storage.md)_ + +#### Store API + +The Sidecar component implements and exposes a gRPC _[Store API](https://github.com/improbable-eng/thanos/tree/master/pkg/store/storepb/rpc.proto#L19)_. The sidecar implementation allows you to query the metric data stored in Prometheus. + +Let's extend the Sidecar in the previous section to connect to a Prometheus server, and expose the Store API. + +```bash +thanos sidecar \ + --tsdb.path /var/prometheus \ + --objstore.config-file bucket_config.yaml \ # Bucket config file to send data to + --prometheus.url http://localhost:9090 \ # Location of the Prometheus HTTP server + --http-address 0.0.0.0:19191 \ # HTTP endpoint for collecting metrics on the Sidecar + --grpc-address 0.0.0.0:19090 # GRPC endpoint for StoreAPI +``` + +* _[Example Kubernetes manifest](https://github.com/improbable-eng/thanos/tree/master/tutorials/kubernetes-demo/manifests/prometheus-ha-sidecar.yaml)_ +* _[Example Kubernetes manifest with GCS upload](https://github.com/improbable-eng/thanos/tree/master/tutorials/kubernetes-demo/manifests/prometheus-ha-sidecar-lts.yaml)_ + +#### External Labels + +Prometheus allows the configuration of "external labels" of a given Prometheus instance. These are meant to globally identify the role of that instance. As Thanos aims to aggregate data across all instances, providing a consistent set of external labels becomes crucial! + +Every Prometheus instance must have a globally unique set of identifying labels. For example, in Prometheus's configuration file: + +```yaml +global: + external_labels: + region: eu-west + monitor: infrastructure + replica: A +``` + +### [Query Gateway](components/query.md) + +Now that we have setup the Sidecar for one or more Prometheus instances, we want to use Thanos' global [Query Layer](components/query.md) to evaluate PromQL queries against all instances at once. + +The Query component is stateless and horizontally scalable and can be deployed with any number of replicas. Once connected to the Sidecars, it automatically detects which Prometheus servers need to be contacted for a given PromQL query. + +Query also implements Prometheus's official HTTP API and can thus be used with external tools such as Grafana. It also serves a derivative of Prometheus's UI for ad-hoc querying and stores status. + +Below, we will set up a Query to connect to our Sidecars, and expose its HTTP UI. + +```bash +thanos query \ + --http-address 0.0.0.0:19192 \ # HTTP Endpoint for Query UI + --store 1.2.3.4:19090 \ # Static gRPC Store API Address for the query node to query + --store 1.2.3.5:19090 \ # Also repeatable + --store dnssrv+_grpc._tcp.thanos-store.monitoring.svc # Supports DNS A & SRV records +``` + +Go to the configured HTTP address that should now show a UI similar to that of Prometheus. If the cluster formed correctly you can now query across all Prometheus instances within the cluster. You can also check the Stores page to check up on your stores. + +#### Deduplicating Data from Prometheus HA pairs + +The Query component is also capable of deduplicating data collected from Prometheus HA pairs. This requires configuring Prometheus's `global.external_labels` configuration block (as mentioned in the [External Labels section](getting-started.md#external-labels)) to identify the role of a given Prometheus instance. + +A typical choice is simply the label name "replica" while letting the value be whatever you wish. For example, you might set up the following in Prometheus's configuration file: + +```yaml +global: + external_labels: + region: eu-west + monitor: infrastructure + replica: A +# ... +``` + +In a Kubernetes stateful deployment, the replica label can also be the pod name. + +Reload your Prometheus instances, and then, in Query, we will define `replica` as the label we want to enable deduplication to occur on: + +```bash +thanos query \ + --http-address 0.0.0.0:19192 \ + --store 1.2.3.4:19090 \ + --store 1.2.3.5:19090 \ + --query.replica-label replica # Replica label for de-duplication +``` + +Go to the configured HTTP address, and you should now be able to query across all Prometheus instances and receive de-duplicated data. + +* _[Example Kubernetes manifest](https://github.com/improbable-eng/thanos/tree/master/tutorials/kubernetes-demo/manifests/thanos-querier.yaml)_ + +#### Communication Between Components + +The only required communication between nodes is for Thanos Querier to be able to reach gRPC storeAPIs you provide. Thanos Querier periodically calls Info endpoint to collect up-to-date metadata as well as checking the health of given StoreAPI. +The metadata includes the information about time windows and external labels for each node. + +There are various ways to tell query component about the StoreAPIs it should query data from. The simplest way is to use a static list of well known addresses to query. +These are repeatable so can add as many endpoint as needed. You can put DNS domain prefixed by `dns+` or `dnssrv+` to have Thanos Query do an `A` or `SRV` lookup to get all required IPs to communicate with. + +```bash +thanos query \ + --http-address 0.0.0.0:19192 \ # Endpoint for Query UI + --grpc-address 0.0.0.0:19092 \ # gRPC endpoint for Store API + --store 1.2.3.4:19090 \ # Static gRPC Store API Address for the query node to query + --store 1.2.3.5:19090 \ # Also repeatable + --store dns+rest.thanos.peers:19092 # Use DNS lookup for getting all registered IPs as separate StoreAPIs +``` + +Read more details [here](service-discovery.md). + +* _[Example Kubernetes manifest](https://github.com/improbable-eng/thanos/tree/master/tutorials/kubernetes-demo/manifests/prometheus-ha-sidecar.yaml)_ +* _[Example Kubernetes manifest with GCS upload](https://github.com/improbable-eng/thanos/tree/master/tutorials/kubernetes-demo/manifests/prometheus-ha-sidecar-lts.yaml)_ + +### [Store Gateway](components/store.md) + +As the sidecar backs up data into the object storage of your choice, you can decrease Prometheus retention and store less locally. However we need a way to query all that historical data again. +The store gateway does just that by implementing the same gRPC data API as the sidecars but backing it with data it can find in your object storage bucket. +Just like sidecars and query nodes, the store gateway exposes StoreAPI and needs to be discovered by Thanos Querier. + +```bash +thanos store \ + --data-dir /var/thanos/store \ # Disk space for local caches + --objstore.config-file bucket_config.yaml \ # Bucket to fetch data from + --http-address 0.0.0.0:19191 \ # HTTP endpoint for collecting metrics on the Store Gateway + --grpc-address 0.0.0.0:19090 # GRPC endpoint for StoreAPI +``` + +The store gateway occupies small amounts of disk space for caching basic information about data in the object storage. This will rarely exceed more than a few gigabytes and is used to improve restart times. It is useful but not required to preserve it across restarts. + +* _[Example Kubernetes manifest](https://github.com/improbable-eng/thanos/tree/master/tutorials/kubernetes-demo/manifests/thanos-store-gateway.yaml)_ + +### [Compactor](components/compact.md) + +A local Prometheus installation periodically compacts older data to improve query efficiency. Since the sidecar backs up data as soon as possible, we need a way to apply the same process to data in the object storage. + +The compactor component simple scans the object storage and processes compaction where required. At the same time it is responsible for creating downsampled copies of data to speed up queries. + +```bash +thanos compact \ + --data-dir /var/thanos/compact \ # Temporary workspace for data processing + --objstore.config-file bucket_config.yaml \ # Bucket where to apply the compacting + --http-address 0.0.0.0:19191 # HTTP endpoint for collecting metrics on the Compactor +``` + +The compactor is not in the critical path of querying or data backup. It can either be run as a periodic batch job or be left running to always compact data as soon as possible. It is recommended to provide 100-300GB of local disk space for data processing. + +_NOTE: The compactor must be run as a **singleton** and must not run when manually modifying data in the bucket._ + +### [Ruler](components/rule.md) + +In case of Prometheus with Thanos sidecar does not have enough retention, or if you want to have alerts or recording rules that requires global view, Thanos has just the component for that: the [Ruler](components/rule.md), +which does rule and alert evaluation on top of a given Thanos Querier. + +### Receiver + +TBD + +## Extras + +Thanos also has a tutorial on deploying it to Kubernetes. We have a full page describing a standard deployment here. + +We also have example Grafana dashboards [here](https://github.com/improbable-eng/thanos/tree/master/examples/grafana/monitoring.md) and some [alerts](https://github.com/improbable-eng/thanos/tree/master/examples/alerts/alerts.md) to get you started. diff --git a/docs/getting_started.md b/docs/getting_started.md deleted file mode 100644 index dad1bdb553..0000000000 --- a/docs/getting_started.md +++ /dev/null @@ -1,290 +0,0 @@ -# Getting started - -Thanos provides a global query view, data backup, and historical data access as its core features in a single binary. All three features can be run independently of each other. This allows you to have a subset of Thanos features ready for immediate benefit or testing, while also making it flexible for gradual roll outs in more complex environments. - -In this quick-start guide, we will configure Thanos and all components mentioned to work against a Google Cloud Storage bucket. -At the moment, Thanos is able to use [different storage providers](storage.md), with the ability to add more providers as necessary. - -## Requirements - -* One or more [Prometheus](https://prometheus.io) v2.2.1+ installations -* golang 1.10+ -* An object storage bucket (optional) - -## Get Thanos! - -You can find the latest Thanos release [here](https://github.com/improbable-eng/thanos/releases). - - -If you want to build Thanos from source, make sure you have installed `bzr` and `git`. `bzr` is required, because `go` modules will use whatever VCS dependency use and in our case a single deps is using `bzr`. -And that you have a working installation of the Go [toolchain](https://github.com/golang/tools) (`GOPATH`, `PATH=${GOPATH}/bin:${PATH}`), Thanos can be downloaded and built by running: - -``` -go get -d github.com/improbable-eng/thanos/... -cd ${GOPATH}/src/github.com/improbable-eng/thanos -make -``` - -The `thanos` binary should now be in your `$PATH` and is the only thing required to deploy any of its components. - -You may meet below error: -``` -go: verifying github.com/grpc-ecosystem/go-grpc-middleware@v1.0.0: checksum mismatch - downloaded: h1:BWIsLfhgKhV5g/oF34aRjniBHLTZe5DNekSjbAjIS6c= - go.sum: h1:Iju5GlWwrvL6UBg4zJJt3btmonfrMlCDdsejg4CZE7c= -Makefile:183: recipe for target 'go-mod-tidy' failed -``` - -If your `golang` version is `1.11.4`, you can run following cmd then `make` would pass: -``` -go clean -modcache -``` - -If your `golang` version is below `1.11.4`, highly recommend you upgrade to `1.11.4` or above. - -## [Prometheus](https://prometheus.io/) - -Thanos bases on vanilla Prometheus (v2.2.1+). - -For exact Prometheus version list Thanos was tested against you can find [here](../Makefile#L42) - -## [Sidecar](components/sidecar.md) - -Thanos integrates with existing Prometheus servers through a [Sidecar process](https://docs.microsoft.com/en-us/azure/architecture/patterns/sidecar#solution), which runs on the same machine or in the same pod as the Prometheus server. - -The purpose of the Sidecar is to backup Prometheus data into an Object Storage bucket, and giving other Thanos components access to the Prometheus instance the Sidecar is attached to. - -[More details about the Sidecar's functions are available at the sidecar documentation page](components/sidecar.md). - -NOTE: If you want to use `reload.*` flags for sidecar, make sure you enable `reload` Prometheus endpoint with flag `--web.enable-lifecycle` - -### Backups - -The following configures the sidecar to only backup Prometheus data into a chosen object storage: - -``` -thanos sidecar \ - --tsdb.path /var/prometheus \ # TSDB data directory of Prometheus - --prometheus.url "http://localhost:9090" \ - --objstore.config-file bucket_config.yaml \ # Bucket to upload data to -``` - -The format of YAML file depends on provider you choose. Examples of config and up-to-date list of storage types Thanos supports is available [here](storage.md). - -Rolling this out has little to zero impact on the running Prometheus instance. It is a good start to ensure you are backing up your data while figuring out the other pieces of Thanos. - -If you are not interested in backing up any data, the `--objstore.config-file` flag can simply be omitted. - -* _[Example Kubernetes manifest](../tutorials/kubernetes-demo/manifests/prometheus-ha-sidecar.yaml)_ -* _[Example Kubernetes manifest with Minio upload](../tutorials/kubernetes-demo/manifests/prometheus-ha-sidecar-lts.yaml)_ -* _[Details & Config for other object stores](./storage.md)_ - -### [Store API](/pkg/store/storepb/rpc.proto#L19) - -The Sidecar component implements and exposes gRPC _[Store API](/pkg/store/storepb/rpc.proto#L19)_. The Store API allows you to query metric data in Prometheus and data backed up into the Object Store bucket. - -Let's extend the Sidecar in the previous section to connect to a Prometheus server, and expose the Store API. - -``` -thanos sidecar \ - --tsdb.path /var/prometheus \ - --objstore.config-file bucket_config.yaml \ # Bucket config file to send data to - --prometheus.url http://localhost:9090 \ # Location of the Prometheus HTTP server - --http-address 0.0.0.0:19191 \ # HTTP endpoint for collecting metrics on the Sidecar - --grpc-address 0.0.0.0:19090 # GRPC endpoint for StoreAPI -``` - -* _[Example Kubernetes manifest](../tutorials/kubernetes-demo/manifests/prometheus-ha-sidecar.yaml)_ -* _[Example Kubernetes manifest with GCS upload](../tutorials/kubernetes-demo/manifests/prometheus-ha-sidecar-lts.yaml)_ - -### External Labels - -Prometheus allows the configuration of "external labels" of a given Prometheus instance. These are meant to globally identify the role of that instance. As Thanos aims to aggregate data across all instances, providing a consistent set of external labels becomes crucial! - -Every Prometheus instance must have a globally unique set of identifying labels. For example, in Prometheus's configuration file: - -``` -global: - external_labels: - region: eu-west - monitor: infrastructure - replica: A -# ... -``` - -## [Query Layer](components/query.md) - -Now that we have setup the Sidecar for one or more Prometheus instances, we want to use Thanos' global [Query Layer](components/query.md) to evaluate PromQL queries against all instances at once. - -The Query component is stateless and horizontally scalable and can be deployed with any number of replicas. Once connected to the Sidecars, it automatically detects which Prometheus servers need to be contacted for a given PromQL query. - -Query also implements Prometheus's offical HTTP API and can thus be used with external tools such as Grafana. It also serves a derivative of Prometheus's UI for ad-hoc querying. - -Below, we will set up a Query to connect to our Sidecars, and expose its HTTP UI. - -``` -thanos query \ - --http-address 0.0.0.0:19192 \ # HTTP Endpoint for Query UI - --store 1.2.3.4:19090 \ # Static gRPC Store API Address for the query node to query - --store 1.2.3.5:19090 # Also repeatable -``` - -Go to the configured HTTP address that should now show a UI similar to that of Prometheus. If the cluster formed correctly you can now query across all Prometheus instances within the cluster. - -### Deduplicating Data from Prometheus HA pairs - -The Query component is also capable of deduplicating data collected from Prometheus HA pairs. This requires configuring Prometheus's `global.external_labels` configuration block (as mentioned in the [External Labels section](#external-labels)) to identify the role of a given Prometheus instance. - -A typical choice is simply the label name "replica" while letting the value be whatever you wish. For example, you might set up the following in Prometheus's configuration file: - -``` -global: - external_labels: - region: eu-west - monitor: infrastructure - replica: A -# ... -``` - -Reload your Prometheus instances, and then, in Query, we will enable `replica` as the label we want to enable deduplication to occur on: - -``` -thanos query \ - --http-address 0.0.0.0:19192 \ - --store 1.2.3.4:19090 \ - --store 1.2.3.5:19090 \ - --query.replica-label replica # Replica label for de-duplication -``` - -Go to the configured HTTP address, and you should now be able to query across all Prometheus instances and receive de-duplicated data. - -* _[Example Kubernetes manifest](../tutorials/kubernetes-demo/manifests/thanos-querier.yaml)_ - -## Communication Between Components - -The only required communication between nodes is for Thanos Querier to be able to reach gRPC storeAPIs you provide. Thanos Querier periodically calls Info endpoint to collect up-to-date metadata as well as checking the health of given StoreAPI. -The metadata includes the information about time windows and external labels for each node. - -There are various ways to tell query component about the StoreAPIs it should query data from. The simplest way is to use a static list of well known addresses to query. -These are repeatable so can add as many endpoint as needed. You can put DNS domain prefixed by `dns://` or `dns+srv://` to have Thanos Query do an `A` or `SRV` lookup to get all required IPs to communicate with. - -``` -thanos query \ - --http-address 0.0.0.0:19192 \ # Endpoint for Query UI - --grpc-address 0.0.0.0:19092 \ # gRPC endpoint for Store API - --store 1.2.3.4:19090 \ # Static gRPC Store API Address for the query node to query - --store 1.2.3.5:19090 \ # Also repeatable - --store dns://rest.thanos.peers:19092 # Use DNS lookup for getting all registered IPs as separate StoreAPIs -``` - -Read more details [here](/docs/service_discovery.md) - -### Deprecated: Gossip configuration. - -Given a sidecar we can have it join a gossip cluster by advertising itself to other peers within the network. - -NOTE: Gossip will be removed. See [here](/docs/proposals/approved/201809_gossip-removal.md) why. New FileSD with DNS support is enabled and described [here](/docs/service_discovery.md) - -``` -thanos sidecar \ - --prometheus.url http://localhost:9090 \ - --tsdb.path /var/prometheus \ - --objstore.config-file bucket_config.yaml \ # Bucket config file to send data to - --grpc-address 0.0.0.0:19091 \ # gRPC endpoint for Store API (will be used to perform PromQL queries) - --http-address 0.0.0.0:19191 \ # HTTP endpoint for collecting metrics on Thanos sidecar - --cluster.address 0.0.0.0:19391 \ # Endpoint used to meta data about the current node - --cluster.advertise-address 127.0.0.1:19391 \ # Location at which the node advertise itself at to other members of the cluster - --cluster.peers 127.0.0.1:19391 # Static cluster peer where the node will get info about the cluster (repeatable) -``` - -With the above configuration a single node will advertise itself in the cluster and query for other members of the cluster (from itself) when you add more sidecars / components you will probably want to sent `cluster.peers` to a well known peer that will allow you to discover other peers within the cluster. - -When a peer advertises itself / joins a gossip cluster it sends information about all the peers it currently knows about (including itself). This information for each peer allows you to see what type of component a peer is (Source, Store, Query), the peers Store API address (used for querying) and meta data about the external labels and time window the peer holds information about. - -Once the Peer joins the cluster it will periodically update the information it sends out with new / updated information about other peers and the time windows for the metrics that it can access. - -``` -thanos query \ - --http-address 0.0.0.0:19192 \ # Endpoint for Query UI - --grpc-address 0.0.0.0:19092 \ # gRPC endpoint for Store API - --cluster.address 0.0.0.0:19591 \ - --cluster.advertise-address 127.0.0.1:19591 \ - --cluster.peers 127.0.0.1:19391 # Static cluster peer where the node will get info about the cluster -``` - -You can mix both static `store` and `cluster` based approaches: - -``` -thanos query \ - --http-address 0.0.0.0:19192 \ # Endpoint for Query UI - --grpc-address 0.0.0.0:19092 \ # gRPC endpoint for Store API - --cluster.address 0.0.0.0:19591 \ - --cluster.advertise-address 127.0.0.1:19591 \ - --cluster.peers 127.0.0.1:19391 \ # Static cluster peer where the node will get info about the cluster - --cluster.peers 127.0.0.1:19392 \ # Another cluster peer (many can be added to discover nodes) - --store 1.2.3.4:19090 \ # Static gRPC Store API Address for the query node to query - --store 1.2.3.5:19090 \ # Also repeatable - --store dns://rest.thanos.peers:19092 # Use DNS lookup for getting all registered IPs as separate StoreAPIs -``` - -When to use gossip vs store flags? -- Use gossip if you want to maintain single gossip cluster that is able to dynamically join and remove components. -- Use static store when you want to have full control of which components are connected. It is also easier to user static store options when setting up communication with remote (cross-cluster) components e.g (sidecar in different network through some proxy) - -Configuration of initial peers is flexible and the argument can be repeated for Thanos to try different approaches. -Additional flags for cluster configuration exist but are typically not needed. Check the `--help` output for further information. - -* _[Example Kubernetes manifest](../tutorials/kubernetes-demo/manifests/prometheus-ha-sidecar.yaml)_ -* _[Example Kubernetes manifest with GCS upload](../tutorials/kubernetes-demo/manifests/prometheus-ha-sidecar-lts.yaml)_ - -## [Store Gateway](components/store.md) - -As the sidecar backs up data into the object storage of your choice, you can decrease Prometheus retention and store less locally. However we need a way to query all that historical data again. -The store gateway does just that by implementing the same gRPC data API as the sidecars but backing it with data it can find in your object storage bucket. -Just like sidecars and query nodes, the store gateway exposes StoreAPI and needs to be discovered by Thanos Querier. - -``` -thanos store \ - --data-dir /var/thanos/store \ # Disk space for local caches - --objstore.config-file bucket_config.yaml \ # Bucket to fetch data from - --http-address 0.0.0.0:19191 \ # HTTP endpoint for collecting metrics on the Store Gateway - --grpc-address 0.0.0.0:19090 # GRPC endpoint for StoreAPI -``` - -The store gateway occupies small amounts of disk space for caching basic information about data in the object storage. This will rarely exceed more than a few gigabytes and is used to improve restart times. It is useful but not required to preserve it across restarts. - -* _[Example Kubernetes manifest](../tutorials/kubernetes-demo/manifests/thanos-store-gateway.yaml)_ - -## [Compactor](components/compact.md) - -A local Prometheus installation periodically compacts older data to improve query efficieny. Since the sidecar backs up data as soon as possible, we need a way to apply the same process to data in the object storage. - -The compactor component simple scans the object storage and processes compaction where required. At the same time it is responsible for creating downsampled copies of data to speed up queries. - -``` -thanos compact \ - --data-dir /var/thanos/compact \ # Temporary workspace for data processing - --objstore.config-file bucket_config.yaml \ # Bucket where to apply the compacting - --http-address 0.0.0.0:19191 # HTTP endpoint for collecting metrics on the Compactor -``` - -The compactor is not in the critical path of querying or data backup. It can either be run as a periodic batch job or be left running to always compact data as soon as possible. It is recommended to provide 100-300GB of local disk space for data processing. - -_NOTE: The compactor must be run as a **singleton** and must not run when manually modifying data in the bucket._ - -## [Ruler](components/rule.md) - -In case of Prometheus with Thanos sidecar does not have enough retention, or if you want to have alerts or recording rules that requires global view, Thanos offers [Ruler](components/rule.md) -which does rule and alert evaluation on top of given Thanos Querier. - -# All-in-one example - -You can find kubernetes manifests [here](../tutorials/kubernetes-demo/manifests). - -# Dashboards - -You can find example Grafana dashboards [here](../examples/grafana/monitoring.md) - -# Alerts - -You can find example Alert configuration [here](../examples/alerts/alerts.md) diff --git a/docs/proposals/approved/201809_gossip-removal.md b/docs/proposals/approved/201809_gossip-removal.md index e19905c1bd..2c3b4de4d1 100644 --- a/docs/proposals/approved/201809_gossip-removal.md +++ b/docs/proposals/approved/201809_gossip-removal.md @@ -1,11 +1,12 @@ -# Deprecated gossip clustering in favor of File SD - -Status: draft | in-review | rejected | **accepted** | complete - -Implementation Owner: [@bplotka](https://github.com/bwplotka) - -Ticket: https://github.com/improbable-eng/thanos/issues/484 - +--- +title: Deprecated gossip clustering in favor of File SD +type: proposal +menu: proposals +status: accepted +owner: bwplotka +--- + +### Ticket: https://github.com/improbable-eng/thanos/issues/484 ## Summary It is becoming clear that we need to remove gossip protocol as our main way of communication between Thanos Querier and @@ -56,7 +57,7 @@ are too.. static. (: We need [File SD](https://github.com/improbable-eng/thanos/ * Add File Service Discovery (SD): https://github.com/improbable-eng/thanos/issues/492 * Remove gossip from the documentation, be clear what talks with what (!) * Deprecate gossip in code. -* Remove gossip code and flags AFTER [File SD](https://github.com/improbable-eng/thanos/issues/492 is done and stable. +* Remove gossip code and flags AFTER [File SD](https://github.com/improbable-eng/thanos/issues/492) is done and stable. ### Backwards compatibility diff --git a/docs/proposals/approved/201812_thanos-remote-receive.md b/docs/proposals/approved/201812_thanos-remote-receive.md index 751785be49..4bf58557fb 100644 --- a/docs/proposals/approved/201812_thanos-remote-receive.md +++ b/docs/proposals/approved/201812_thanos-remote-receive.md @@ -1,8 +1,10 @@ -## Thanos Remote Write - -Status: draft | in-review | rejected | *accepted* | complete - -Implementation Owner: @brancz +--- +title: Thanos Remote Write +type: proposal +menu: proposals +status: accepted +owner: brancz +--- ## Summary diff --git a/docs/proposals/approved/201901-read-write-operations-bucket.md b/docs/proposals/approved/201901-read-write-operations-bucket.md index 811f347dcd..03871ea69a 100644 --- a/docs/proposals/approved/201901-read-write-operations-bucket.md +++ b/docs/proposals/approved/201901-read-write-operations-bucket.md @@ -1,10 +1,13 @@ -# Read-Write coordination free operational contract for object storage +--- +title: Read-Write coordination free operational contract for object storage +type: proposal +menu: proposals +status: accepted +owner: bwplotka +--- -Status: draft | in-review | rejected | **accepted** | complete +### Tickets: -Implementation Owner: [@bwplotka](https://github.com/bwplotka) - -Tickets: * https://github.com/improbable-eng/thanos/issues/298 (eventual consistency) * https://github.com/improbable-eng/thanos/issues/377 (eventual consistency & partial upload) * https://github.com/improbable-eng/thanos/issues/564 (retention vs store gateway) diff --git a/docs/proposals/rejected/201807_store_instance_high_availability.md b/docs/proposals/rejected/201807_store_instance_high_availability.md index ad01d35f81..0011f6765e 100644 --- a/docs/proposals/rejected/201807_store_instance_high_availability.md +++ b/docs/proposals/rejected/201807_store_instance_high_availability.md @@ -1,11 +1,12 @@ -# High-availability for store instances - -Status: draft | in-review | **rejected** | accepted | complete - -Proposal author: [@mattbostock](https://github.com/mattbostock) -Implementation owner: [@mattbostock](https://github.com/mattbostock) - -## Status: Rejected +--- +title: High-availability for store instances +type: proposal +menu: proposals +status: rejected +owner: mattbostock +--- + +## Summary This proposal makes total sense and solves our goals when using gossip. However there exists a very easy solution to this problem in form of using just static entry with any loadbalancer like Kubernetes Service to load balance diff --git a/docs/proposals/rejected/config.md b/docs/proposals/rejected/config.md index 5396a24a14..7b2175d199 100644 --- a/docs/proposals/rejected/config.md +++ b/docs/proposals/rejected/config.md @@ -1,8 +1,10 @@ -# Thanos Cluster Configuration - -Status: draft | in-review | **rejected** | accepted | complete - -Implementation Owner: [@domgreen](https://github.com/domgreen) +--- +title: Thanos Cluster Configuration +type: proposal +menu: proposals +status: rejected +owner: domgreen +--- ## Summary diff --git a/docs/release_process.md b/docs/release-process.md similarity index 70% rename from docs/release_process.md rename to docs/release-process.md index 1fdb0f4f1f..3ba2b29878 100644 --- a/docs/release_process.md +++ b/docs/release-process.md @@ -1,27 +1,47 @@ # Releases -This page describes the release process for Thanos project. +This page describes the release cadence and process for Thanos project. NOTE: As [Semantic Versioning](http://semver.org/spec/v2.0.0.html) states all 0.y.z releases can contain breaking changes in API (flags, grpc API, any backward compatibility) ## Cadence -We aim for *at least* 1 release per 6 weeks. However, no strict dates are planned. +We aim for regular and strict one release per 6 weeks. 6 weeks is counter from first release candidate to another. +This means that there is no *code freeze* or anything like that. We plan to stick to the exact 6 weeks, so there is no rush +into being within release (except bug fixes). -No release candidates are required until major version. +No feature should block release. -Additionally we aim for `master` branch being stable. +Additionally we (obviously) aim for `master` branch being stable. -## Cutting individual release +## For maintainers: Cutting individual release -Process of cutting a new *minor* Thanos release: +We will choose a release shepherd for each minor release. + +Release shepherd responsibilities: +* Perform releases (from first RC to actual release). +* Announce all releases on all communication channels. + +Process of releasing a *minor* Thanos version: +1. Release `v..0-rc.0` +1. If after 3 work days there is no major bug, release `v..0` +1. If within 3 work days there is major bug, let's triage it to fix it and then release `v..0-rc.++` Go to step 2. +1. Do patch release if needed for any bugs afterwards. Use same `release-xxx` branch. + +### How to release "a version" 1. Add PR on branch `release-.` that will start minor release branch and prepare changes to cut release. + + For release candidate just reuse same branch and rebase it on every candidate until the actual release happens. + 1. Bump [VERSION file](/VERSION) 1. Update [CHANGELOG file](/CHANGELOG.md) Note that `CHANGELOG.md` should only document changes relevant to users of Thanos, including external API changes, performance improvements, and new features. Do not document changes of internal interfaces, code refactorings and clean-ups, changes to the build process, etc. People interested in these are asked to refer to the git history. Format is described in `CHANGELOG.md`. + + The whole release from release candidate `rc.0` to actual release should have exactly the same section. We don't separate + what have changed between release candidates. 1. Double check backward compatibility: 1. *In case of version after `v1+.y.z`*, double check if none of the changes break API compatibility. This should be done in PR review process, but double check is good to have. diff --git a/docs/service_discovery.md b/docs/service-discovery.md similarity index 97% rename from docs/service_discovery.md rename to docs/service-discovery.md index ef6c694ef6..80fd1118cb 100644 --- a/docs/service_discovery.md +++ b/docs/service-discovery.md @@ -1,4 +1,11 @@ -# Thanos Service Discovery +--- +title: Service Discovery +type: docs +menu: thanos +slug: /service-discovery.md +--- + +# Service Discovery Service discovery has a vital place in Thanos components. It allows Thanos to discover different set API targets required to perform certain operations. This logic is meant to replace Gossip that [is planned to be removed.](/docs/proposals/approved/201809_gossip-removal.md) @@ -38,6 +45,7 @@ The format of the configuration file is the same as the one used in [Prometheus' Both YAML and JSON files can be used. The format of the files is this: * JSON: + ```json [ { @@ -47,6 +55,7 @@ Both YAML and JSON files can be used. The format of the files is this: ``` * YAML: + ```yaml - targets: ['localhost:9090', 'example.org:443'] ``` diff --git a/docs/storage.md b/docs/storage.md index 96582fe962..8cd58a3dc7 100644 --- a/docs/storage.md +++ b/docs/storage.md @@ -1,6 +1,13 @@ +--- +title: Object Storage +type: docs +menu: thanos +slug: /storage.md +--- + # Object Storage -Thanos supports any object stores that can be implemented against Thanos [objstore.Bucket interface](/pkg/objstore/objstore.go) +Thanos supports any object stores that can be implemented against Thanos [objstore.Bucket interface](https://github.com/improbable-eng/thanos/pkg/objstore/objstore.go) All clients are configured using `--objstore.config-file` to reference to the configuration file or `--objstore.config` to put yaml config directly. @@ -21,12 +28,12 @@ NOTE: Currently Thanos requires strong consistency (write-read) for object store ## How to add a new client? 1. Create new directory under `pkg/objstore/` -2. Implement [objstore.Bucket interface](/pkg/objstore/objstore.go) +2. Implement [objstore.Bucket interface](https://github.com/improbable-eng/thanos/pkg/objstore/objstore.go) 3. Add `NewTestBucket` constructor for testing purposes, that creates and deletes temporary bucket. -4. Use created `NewTestBucket` in [ForeachStore method](/pkg/objstore/objtesting/foreach.go) to ensure we can run tests against new provider. (In PR) -5. RUN the [TestObjStoreAcceptanceTest](/pkg/objstore/objtesting/acceptance_e2e_test.go) against your provider to ensure it fits. Fix any found error until test passes. (In PR) -6. Add client implementation to the factory in [factory](/pkg/objstore/client/factory.go) code. (Using as small amount of flags as possible in every command) -7. Add client struct config to [bucketcfggen](/scripts/bucketcfggen/main.go) to allow config auto generation. +4. Use created `NewTestBucket` in [ForeachStore method](https://github.com/improbable-eng/thanos/pkg/objstore/objtesting/foreach.go) to ensure we can run tests against new provider. (In PR) +5. RUN the [TestObjStoreAcceptanceTest](https://github.com/improbable-eng/thanos/pkg/objstore/objtesting/acceptance_e2e_test.go) against your provider to ensure it fits. Fix any found error until test passes. (In PR) +6. Add client implementation to the factory in [factory](https://github.com/improbable-eng/thanos/pkg/objstore/client/factory.go) code. (Using as small amount of flags as possible in every command) +7. Add client struct config to [bucketcfggen](https://github.com/improbable-eng/thanos/scripts/bucketcfggen/main.go) to allow config auto generation. At that point, anyone can use your provider by spec @@ -42,6 +49,7 @@ type: S3 config: bucket: "" endpoint: "" + region: "" access_key: "" insecure: false signature_version2: false @@ -55,7 +63,7 @@ config: enable: false ``` -AWS region to endpoint mapping can be found in this [link](https://docs.aws.amazon.com/general/latest/gr/rande.html#s3_region) +The attribute `region` is optional. AWS region to endpoint mapping can be found in this [link](https://docs.aws.amazon.com/general/latest/gr/rande.html#s3_region). Make sure you use a correct signature version. Currently AWS require signature v4, so it needs `signature-version2: false`, otherwise, you will get Access Denied error, but several other S3 compatible use `signature-version2: true` @@ -201,14 +209,28 @@ config: ### GCS Policies +__Note:__ GCS Policies should be applied at the project level, not at the bucket level + For deployment: -`Storage Object Creator` and ` Storage Object Viewer` +`Storage Object Creator` and `Storage Object Viewer` For testing: `Storage Object Admin` for ability to create and delete temporary buckets. +To test the policy is working as expected, exec into the sidecar container, eg: + +```sh +kubectl exec -it -n -c -- /bin/sh +``` + +Then test that you can at least list objects in the bucket, eg: + +```sh +thanos bucket ls --objstore.config="${OBJSTORE_CONFIG}" +``` + ## Azure Configuration To use Azure Storage as Thanos object store, you need to precreate storage account from Azure portal or using Azure CLI. Follow the instructions from Azure Storage Documentation: [https://docs.microsoft.com/en-us/azure/storage/common/storage-quickstart-create-account](https://docs.microsoft.com/en-us/azure/storage/common/storage-quickstart-create-account?tabs=portal) @@ -224,6 +246,7 @@ config: storage_account: "" storage_account_key: "" container: "" + endpoint: "" ``` ### OpenStack Swift Configuration diff --git a/go.mod b/go.mod index b0e865d101..044c0d0441 100644 --- a/go.mod +++ b/go.mod @@ -17,11 +17,12 @@ require ( github.com/grpc-ecosystem/go-grpc-middleware v1.0.0 github.com/grpc-ecosystem/go-grpc-prometheus v0.0.0-20181025070259-68e3a13e4117 github.com/hashicorp/go-sockaddr v0.0.0-20180320115054-6d291a969b86 - github.com/hashicorp/golang-lru v0.5.0 + github.com/hashicorp/go-version v1.1.0 + github.com/hashicorp/golang-lru v0.5.1 github.com/hashicorp/memberlist v0.1.0 github.com/julienschmidt/httprouter v1.1.0 // indirect github.com/lovoo/gcloud-opentracing v0.3.0 - github.com/miekg/dns v1.0.8 // indirect + github.com/miekg/dns v1.1.4 github.com/minio/minio-go v0.0.0-20190131015406-c8a261de75c1 github.com/mozillazg/go-cos v0.11.0 github.com/mwitkow/go-conntrack v0.0.0-20161129095857-cc309e4a2223 @@ -34,13 +35,13 @@ require ( github.com/pkg/errors v0.8.1 github.com/prometheus/client_golang v0.9.2 github.com/prometheus/common v0.0.0-20181218105931-67670fe90761 - github.com/prometheus/prometheus v0.0.0-20190118110214-3bd41cc92c78 - github.com/prometheus/tsdb v0.4.0 + github.com/prometheus/prometheus v0.0.0-20190328180107-4d60eb36dcbe + github.com/prometheus/tsdb v0.6.1 go.opencensus.io v0.19.0 // indirect golang.org/x/net v0.0.0-20190213061140-3a22650c66bd golang.org/x/oauth2 v0.0.0-20181203162652-d668ce993890 golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4 - golang.org/x/sys v0.0.0-20190222072716-a9d3bda3a223 // indirect + golang.org/x/sys v0.0.0-20190322080309-f49334f85ddc // indirect golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2 google.golang.org/api v0.1.0 google.golang.org/appengine v1.4.0 // indirect diff --git a/go.sum b/go.sum index 237900b401..ca3bbf0664 100644 --- a/go.sum +++ b/go.sum @@ -1,18 +1,15 @@ cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= cloud.google.com/go v0.34.0 h1:eOI3/cP2VTU6uZLDYAoic+eyzzB9YyGmJ7eIjl8rOPg= cloud.google.com/go v0.34.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= +contrib.go.opencensus.io/exporter/ocagent v0.4.1/go.mod h1:b6YwD5Q3Yvj4yk0CDK5vGXexygNzI09aXUdDEakQBgA= git.apache.org/thrift.git v0.0.0-20180902110319-2566ecd5d999/go.mod h1:fPE2ZNJGynbRyZ4dJvy6G277gSllfV2HJqblrnkyeyg= git.apache.org/thrift.git v0.0.0-20181218151757-9b75e4fe745a/go.mod h1:fPE2ZNJGynbRyZ4dJvy6G277gSllfV2HJqblrnkyeyg= github.com/Azure/azure-pipeline-go v0.1.8 h1:KmVRa8oFMaargVesEuuEoiLCQ4zCCwQ8QX/xg++KS20= github.com/Azure/azure-pipeline-go v0.1.8/go.mod h1:XA1kFWRVhSK+KNFiOhfv83Fv8L9achrP7OxIzeTn1Yg= -github.com/Azure/azure-sdk-for-go v0.0.0-20161028183111-bd73d950fa44 h1:L4fLiifszjLnCRGi6Xhp0MgUwjIMbVXKbayoRiVxkU8= -github.com/Azure/azure-sdk-for-go v0.0.0-20161028183111-bd73d950fa44/go.mod h1:9XXNKU+eRnpl9moKnB4QOLf1HestfXbmab5FXxiDBjc= +github.com/Azure/azure-sdk-for-go v23.2.0+incompatible/go.mod h1:9XXNKU+eRnpl9moKnB4QOLf1HestfXbmab5FXxiDBjc= github.com/Azure/azure-storage-blob-go v0.0.0-20181022225951-5152f14ace1c h1:Y5ueznoCekgCWBytF1Q9lTpZ3tJeX37dQtCcGjMCLYI= github.com/Azure/azure-storage-blob-go v0.0.0-20181022225951-5152f14ace1c/go.mod h1:oGfmITT1V6x//CswqY2gtAHND+xIP64/qL7a5QJix0Y= -github.com/Azure/go-autorest v10.8.1+incompatible h1:u0jVQf+a6k6x8A+sT60l6EY9XZu+kHdnZVPAYqpVRo0= -github.com/Azure/go-autorest v10.8.1+incompatible/go.mod h1:r+4oMnoxhatjLLJ6zxSWATqVooLgysK6ZNox3g/xq24= -github.com/NYTimes/gziphandler v1.0.1 h1:iLrQrdwjDd52kHDA5op2UBJFjmOb9g+7scBan4RN8F0= -github.com/NYTimes/gziphandler v1.0.1/go.mod h1:3wb06e3pkSAbeQ52E9H9iFoQsEEwGN64994WTCIhntQ= +github.com/Azure/go-autorest v11.2.8+incompatible/go.mod h1:r+4oMnoxhatjLLJ6zxSWATqVooLgysK6ZNox3g/xq24= github.com/NYTimes/gziphandler v1.1.1 h1:ZUDjpQae29j0ryrS0u/B8HZfJBtBQHjqw2rQ2cqUQ3I= github.com/NYTimes/gziphandler v1.1.1/go.mod h1:n/CVRwUEOgIxrgPvAQhUUr9oeUtvrhMomdKFjzJNB0c= github.com/OneOfOne/xxhash v1.2.2 h1:KMrpdQIwFcEqXDklaen+P1axHaj9BSKzvpUUfnHldSE= @@ -25,12 +22,12 @@ github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf h1:qet1QNfXsQxTZq github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0= github.com/armon/go-metrics v0.0.0-20180917152333-f0300d1749da h1:8GUt8eRujhVEGZFFEjBj46YV4rDjvGrNxb0KMWYkL2I= github.com/armon/go-metrics v0.0.0-20180917152333-f0300d1749da/go.mod h1:Q73ZrmVTwzkszR9V5SSuryQ31EELlFMUz1kKyl939pY= -github.com/aws/aws-sdk-go v0.0.0-20180507225419-00862f899353 h1:qFKf58XUUvHaEz0zFkLJsQ4dzoAyrQ8QyhK4nHGHBI4= github.com/aws/aws-sdk-go v0.0.0-20180507225419-00862f899353/go.mod h1:ZRmQr0FajVIyZ4ZzBYKG5P3ZqPz9IHG41ZoMu1ADI3k= github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973 h1:xJ4a3vCFaGF/jqvzLMYoU8P317H5OQ+Via4RmuPwCS0= github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q= github.com/biogo/store v0.0.0-20160505134755-913427a1d5e8/go.mod h1:Iev9Q3MErcn+w3UOJD/DkEzllvugfdx7bGcMOFhvr/4= github.com/cenk/backoff v2.0.0+incompatible/go.mod h1:7FtoeaSnHoZnmZzz47cM35Y9nSW7tNyaidugnHTaFDE= +github.com/census-instrumentation/opencensus-proto v0.1.0/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= github.com/certifi/gocertifi v0.0.0-20180905225744-ee1a9a0726d2/go.mod h1:GJKEexRPVJrBSOjoqN5VNOIKJ5Q3RViH6eu3puDRwx4= github.com/cespare/xxhash v1.1.0 h1:a6HrQnmkObjyL+Gs60czilIUGqrzKutQD6XZog3p+ko= github.com/cespare/xxhash v1.1.0/go.mod h1:XrSqR1VqqWfGrhpAt58auRo0WTKS1nRRg3ghfAqPWnc= @@ -44,13 +41,11 @@ github.com/coreos/etcd v3.3.10+incompatible/go.mod h1:uF7uidLiAD3TWHmW31ZFd/JWoc github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/dgrijalva/jwt-go v0.0.0-20161101193935-9ed569b5d1ac h1:xrQJVwQCGqDvOO7/0+RyIq5J2M3Q4ZF7Ug/BMQtML1E= github.com/dgrijalva/jwt-go v0.0.0-20161101193935-9ed569b5d1ac/go.mod h1:E3ru+11k8xSBh+hMPgOLZmtrrCbhqsmaPHjLKYnJCaQ= github.com/dgryski/go-sip13 v0.0.0-20181026042036-e10d5fee7954/go.mod h1:vAd38F8PWV+bWy6jNmig1y/TA+kYO4g3RSRF0IAv0no= github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk= github.com/elastic/gosigar v0.9.0/go.mod h1:cdorVVzy1fhmEqmtgqkoE3bYtCfSCkVyjTyCIo22xvs= github.com/elazarl/go-bindata-assetfs v1.0.0/go.mod h1:v+YaWX3bdea5J/mo8dSETolEo7R71Vk1u8bnjau5yw4= -github.com/evanphx/json-patch v4.1.0+incompatible h1:K1MDoo4AZ4wU0GIU/fPmtZg7VpzLjCxu+UwBD1FvwOc= github.com/evanphx/json-patch v4.1.0+incompatible/go.mod h1:50XU6AFN0ol/bzJsmQLiYLvXMP4fmwYFNcr97nuDLSk= github.com/facebookgo/clock v0.0.0-20150410010913-600d898af40a/go.mod h1:7Ga40egUymuWXxAe151lTNnCv97MddSOVsjpPPkityA= github.com/fatih/structtag v1.0.0 h1:pTHj65+u3RKWYPSGaU290FpI/dXxTaHdVwVwbcPKmEc= @@ -61,12 +56,13 @@ github.com/fsnotify/fsnotify v1.4.7 h1:IXs+QLmnXW2CcXuY+8Mzv/fWEsPGWxqefPtCP5CnV github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo= github.com/getsentry/raven-go v0.1.0/go.mod h1:KungGk8q33+aIAZUIVWZDr2OfAEBsO49PX4NzFV5kcQ= github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04= -github.com/go-ini/ini v1.21.1 h1:+QXUYsI7Tfxc64oD6R5BxU/Aq+UwGkyjH4W/hMNG7bg= github.com/go-ini/ini v1.21.1/go.mod h1:ByCAeIL28uOIIG0E3PJtZPDL8WnHpFKFOtgjp+3Ies8= github.com/go-kit/kit v0.8.0 h1:Wz+5lgoB0kkuqLEc6NVmwRknTKP6dTGbSqvhZtBI/j0= github.com/go-kit/kit v0.8.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as= github.com/go-logfmt/logfmt v0.3.0 h1:8HUsc87TaSWLKwrnumgC8/YconD2fJQsRJAsWaPg2ic= github.com/go-logfmt/logfmt v0.3.0/go.mod h1:Qt1PoO58o5twSAckw1HlFXLmHsOX5/0LbT9GBnD5lWE= +github.com/go-logfmt/logfmt v0.4.0 h1:MP4Eh7ZCb31lleYCFuwm0oe4/YGak+5l1vA2NOE80nA= +github.com/go-logfmt/logfmt v0.4.0/go.mod h1:3RMwSq7FuexP4Kalkev3ejPJsZTpXXBr9+V4qmtdjCk= github.com/go-ole/go-ole v1.2.1/go.mod h1:7FAglXiTm7HKlQRDeOQ6ZNUHidzCWXuZWq/1dTyBNF8= github.com/go-sql-driver/mysql v1.4.0/go.mod h1:zAC/RDZ24gD3HViQzih4MyKcchzm+sOG5ZlKdlhCg5w= github.com/go-stack/stack v1.8.0 h1:5SgMzNM5HxrEjV0ww2lTmX6E2Izsfxas4+YHWRs3Lsk= @@ -76,7 +72,6 @@ github.com/gogo/protobuf v1.2.0 h1:xU6/SpYbvkNYiptHJYEDRseDLvYE7wSqhYYNy0QSUzI= github.com/gogo/protobuf v1.2.0/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ= github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b h1:VKtxabqXZkF25pY9ekfRL6a582T4P37/31XEstQ5p58= github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= -github.com/golang/groupcache v0.0.0-20180924190550-6f2cf27854a4 h1:6UVLWz0fIIrv0UVj6t0A7cL48n8IyAdLVQqAYzEfsKI= github.com/golang/groupcache v0.0.0-20180924190550-6f2cf27854a4/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= github.com/golang/lint v0.0.0-20180702182130-06c8688daad7/go.mod h1:tluoj9z5200jBnyusfRPU2LqT6J+DAorxEvtC7LHB+E= github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A= @@ -86,27 +81,23 @@ github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5y github.com/golang/snappy v0.0.0-20160529050041-d9eb7a3d35ec/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= github.com/golang/snappy v0.0.0-20180518054509-2e65f85255db h1:woRePGFeVFfLKN/pOkfl+p/TAqKOfFu+7KPlMVpok/w= github.com/golang/snappy v0.0.0-20180518054509-2e65f85255db/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= -github.com/google/btree v0.0.0-20180124185431-e89373fe6b4a h1:ZJu5NB1Bk5ms4vw0Xu4i+jD32SE9jQXyfnOvwhHqlT0= github.com/google/btree v0.0.0-20180124185431-e89373fe6b4a/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= github.com/google/go-cmp v0.2.0 h1:+dTQ8DZQJz0Mb/HjFlkptS1FeQ4cWSnN941F8aEG4SQ= github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M= github.com/google/go-querystring v1.0.0 h1:Xkwi/a1rcvNg1PPYe5vI8GbeBY/jrVuDX5ASuANWTrk= github.com/google/go-querystring v1.0.0/go.mod h1:odCYkC5MyYFN7vkCjXpyrEuKhc/BUO6wN/zVPAxq5ck= -github.com/google/gofuzz v0.0.0-20150304233714-bbcb9da2d746 h1:M6d2zDTA4cKXT6OwFsJxlo5tWrAukj3KfvJ1zcBatnA= github.com/google/gofuzz v0.0.0-20150304233714-bbcb9da2d746/go.mod h1:HP5RmnzzSNb993RKQDq4+1A4ia9nllfqcQFTQJedwGI= github.com/google/martian v2.1.0+incompatible h1:/CP5g8u/VJHijgedC/Legn3BAbAaWPgecwXBIDzw5no= github.com/google/martian v2.1.0+incompatible/go.mod h1:9I4somxYTbIHy5NJKHRl3wXiIaQGbYVAs8BPL6v8lEs= github.com/google/pprof v0.0.0-20180605153948-8b03ce837f34/go.mod h1:zfwlbNMJ+OItoe0UupaVj+oy1omPYYDuagoSzA8v9mc= github.com/googleapis/gax-go v2.0.2+incompatible h1:silFMLAnr330+NRuag/VjIGF7TLp/LBrV2CJKFLWEww= github.com/googleapis/gax-go v2.0.2+incompatible/go.mod h1:SFVmujtThgffbyetf+mdk2eWhX2bMyUtNHzFKcPA9HY= -github.com/googleapis/gnostic v0.0.0-20180520015035-48a0ecefe2e4 h1:yxHFSapGMUoyn+3v6LiJJxoJhvbDqIq8me0gAWehnSU= github.com/googleapis/gnostic v0.0.0-20180520015035-48a0ecefe2e4/go.mod h1:sJBsCZ4ayReDTBIg8b9dl28c5xFWyhBTVRp3pOg5EKY= github.com/gophercloud/gophercloud v0.0.0-20181206160319-9d88c34913a9 h1:7TRGugCPfA2Mll6QT7cbhD1GXZwk7+1PUz8tYrOWXgQ= github.com/gophercloud/gophercloud v0.0.0-20181206160319-9d88c34913a9/go.mod h1:3WdhXV3rUYy9p6AUW8d94kr+HS62Y4VL9mBnFxsD8q4= github.com/gopherjs/gopherjs v0.0.0-20181017120253-0766667cb4d1/go.mod h1:wJfORRmW1u3UXTncJ5qlYoELFm8eSnnEO6hX4iZ3EWY= github.com/gopherjs/gopherjs v0.0.0-20181103185306-d547d1d9531e h1:JKmoR8x90Iww1ks85zJ1lfDGgIiMDuIptTOhJq+zKyg= github.com/gopherjs/gopherjs v0.0.0-20181103185306-d547d1d9531e/go.mod h1:wJfORRmW1u3UXTncJ5qlYoELFm8eSnnEO6hX4iZ3EWY= -github.com/gregjones/httpcache v0.0.0-20180305231024-9cad4c3443a7 h1:pdN6V1QBWetyv/0+wjACpqVH+eVULgEjkurDLq3goeM= github.com/gregjones/httpcache v0.0.0-20180305231024-9cad4c3443a7/go.mod h1:FecbI9+v66THATjSRHfNgh1IVFe/9kFxbXtjV0ctIMA= github.com/grpc-ecosystem/go-grpc-middleware v1.0.0 h1:Iju5GlWwrvL6UBg4zJJt3btmonfrMlCDdsejg4CZE7c= github.com/grpc-ecosystem/go-grpc-middleware v1.0.0/go.mod h1:FiyG127CGDf3tlThmgyCl78X/SZQqEOJBCDaAfeWzPs= @@ -120,7 +111,6 @@ github.com/hashicorp/consul v0.0.0-20180615161029-bed22a81e9fd h1:auIpcMc3+//R94 github.com/hashicorp/consul v0.0.0-20180615161029-bed22a81e9fd/go.mod h1:mFrjN1mfidgJfYP1xrJCF+AfRhr6Eaqhb2+sfyn/OOI= github.com/hashicorp/errwrap v1.0.0 h1:hLrqtEDnRye3+sgx6z4qVLNuviH3MR5aQ0ykNJa/UYA= github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4= -github.com/hashicorp/go-cleanhttp v0.0.0-20160407174126-ad28ea4487f0 h1:2l0haPDqCzZEO160UR5DSrrl8RWptFCoxFsSbRLJBaI= github.com/hashicorp/go-cleanhttp v0.0.0-20160407174126-ad28ea4487f0/go.mod h1:JpRdi6/HCYpAwUzNwuwqhbovhLtngrth3wmdIIUrZ80= github.com/hashicorp/go-immutable-radix v1.0.0 h1:AKDB1HM5PWEA7i4nhcpwOrO2byshxBjXVn/J/3+z5/0= github.com/hashicorp/go-immutable-radix v1.0.0/go.mod h1:0y9vanUI8NX6FsYoO3zeMjhV/C5i9g4Q3DwcSNZ4P60= @@ -128,28 +118,28 @@ github.com/hashicorp/go-msgpack v0.0.0-20150518234257-fa3f63826f7c h1:BTAbnbegUI github.com/hashicorp/go-msgpack v0.0.0-20150518234257-fa3f63826f7c/go.mod h1:ahLV/dePpqEmjfWmKiqvPkv/twdG7iPBM1vqhUKIvfM= github.com/hashicorp/go-multierror v1.0.0 h1:iVjPR7a6H0tWELX5NxNe7bYopibicUzc7uPribsnS6o= github.com/hashicorp/go-multierror v1.0.0/go.mod h1:dHtQlpGsu+cZNNAkkCN/P3hoUDHhCYQXV3UM06sGGrk= -github.com/hashicorp/go-rootcerts v0.0.0-20160503143440-6bb64b370b90 h1:VBj0QYQ0u2MCJzBfeYXGexnAl17GsH1yidnoxCqqD9E= github.com/hashicorp/go-rootcerts v0.0.0-20160503143440-6bb64b370b90/go.mod h1:o4zcYY1e0GEZI6eSEr+43QDYmuGglw1qSO6qdHUHCgg= github.com/hashicorp/go-sockaddr v0.0.0-20180320115054-6d291a969b86 h1:7YOlAIO2YWnJZkQp7B5eFykaIY7C9JndqAFQyVV5BhM= github.com/hashicorp/go-sockaddr v0.0.0-20180320115054-6d291a969b86/go.mod h1:7Xibr9yA9JjQq1JpNB2Vw7kxv8xerXegt+ozgdvDeDU= github.com/hashicorp/go-uuid v1.0.0 h1:RS8zrF7PhGwyNPOtxSClXXj9HA8feRnJzgnI1RJCSnM= github.com/hashicorp/go-uuid v1.0.0/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro= +github.com/hashicorp/go-version v1.1.0 h1:bPIoEKD27tNdebFGGxxYwcL4nepeY4j1QP23PFRGzg0= +github.com/hashicorp/go-version v1.1.0/go.mod h1:fltr4n8CU8Ke44wwGCBoEymUuxUHl09ZGVZPK5anwXA= github.com/hashicorp/golang-lru v0.5.0 h1:CL2msUPvZTLb5O648aiLNJw3hnBxN2+1Jq8rCOH9wdo= github.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8= +github.com/hashicorp/golang-lru v0.5.1 h1:0hERBMJE1eitiLkihrMvRVBYAkpHzc/J3QdDN+dAcgU= +github.com/hashicorp/golang-lru v0.5.1/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8= github.com/hashicorp/memberlist v0.1.0 h1:qSsCiC0WYD39lbSitKNt40e30uorm2Ss/d4JGU1hzH8= github.com/hashicorp/memberlist v0.1.0/go.mod h1:ncdBp14cuox2iFOq3kDiquKU6fqsTBc3W6JvZwjxxsE= github.com/hashicorp/serf v0.0.0-20161007004122-1d4fa605f6ff h1:3QdMaUEV3zE0VcBgBj+P3GQ26ZKmhb5gLea1hqAZ50U= github.com/hashicorp/serf v0.0.0-20161007004122-1d4fa605f6ff/go.mod h1:h/Ru6tmZazX7WO/GDmwdpS975F019L4t5ng5IgwbNrE= github.com/hashicorp/yamux v0.0.0-20181012175058-2f1d1f20f75d h1:kJCB4vdITiW1eC1vq2e6IsrXKrZit1bv/TDYFGMp4BQ= github.com/hashicorp/yamux v0.0.0-20181012175058-2f1d1f20f75d/go.mod h1:+NfK9FKeTrX5uv1uIXGdwYDTeHna2qgaIlx54MXqjAM= -github.com/hpcloud/tail v1.0.0 h1:nfCOvKYfkgYP8hkirhJocXT2+zOD8yUNjXaWfTlyFKI= github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU= github.com/influxdata/influxdb v0.0.0-20170331210902-15e594fc09f1/go.mod h1:qZna6X/4elxqT3yI9iZYdZrWWdeFOOprn86kgg4+IzY= github.com/jackc/fake v0.0.0-20150926172116-812a484cc733/go.mod h1:WrMFNQdiFJ80sQsxDoMokWK1W5TQtxBFNpzWTD84ibQ= github.com/jackc/pgx v3.2.0+incompatible/go.mod h1:0ZGrqGqkRlliWnWB4zKnWtjbSWbGkVEFm4TeybAXq+I= -github.com/jmespath/go-jmespath v0.0.0-20160803190731-bd40a432e4c7 h1:SMvOWPJCES2GdFracYbBQh93GXac8fq7HeN6JnpduB8= github.com/jmespath/go-jmespath v0.0.0-20160803190731-bd40a432e4c7/go.mod h1:Nht3zPeWKUH0NzdCt2Blrr5ys8VGpn0CEB0cQHVjt7k= -github.com/json-iterator/go v0.0.0-20180612202835-f2b4162afba3 h1:/UewZcckqhvnnS0C6r3Sher2hSEbVmM6Ogpcjen08+Y= github.com/json-iterator/go v0.0.0-20180612202835-f2b4162afba3/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU= github.com/jtolds/gls v4.2.1+incompatible h1:fSuqC+Gmlu6l/ZYAoZzx2pyucC8Xza35fpRVWLVmUEE= github.com/jtolds/gls v4.2.1+incompatible/go.mod h1:QJZ7F/aHp+rZTRtaJ1ow/lLfFfVYBRgL+9YlvaHOwJU= @@ -173,20 +163,16 @@ github.com/mattn/go-runewidth v0.0.3 h1:a+kO+98RDGEfo6asOGMmpodZq4FNtnGP54yps8Bz github.com/mattn/go-runewidth v0.0.3/go.mod h1:LwmH8dsx7+W8Uxz3IHJYH5QSwggIsqBzpuz5H//U1FU= github.com/matttproud/golang_protobuf_extensions v1.0.1 h1:4hp9jkHxhMHkqkrB3Ix0jegS5sx/RkqARlsWZ6pIwiU= github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0= -github.com/miekg/dns v1.0.4/go.mod h1:W1PPwlIAgtquWBMBEV9nkV9Cazfe8ScdGz/Lj7v3Nrg= -github.com/miekg/dns v1.0.8 h1:Zi8HNpze3NeRWH1PQV6O71YcvJRQ6j0lORO6DAEmAAI= -github.com/miekg/dns v1.0.8/go.mod h1:W1PPwlIAgtquWBMBEV9nkV9Cazfe8ScdGz/Lj7v3Nrg= +github.com/miekg/dns v1.1.4 h1:rCMZsU2ScVSYcAsOXgmC6+AKOK+6pmQTOcw03nfwYV0= +github.com/miekg/dns v1.1.4/go.mod h1:W1PPwlIAgtquWBMBEV9nkV9Cazfe8ScdGz/Lj7v3Nrg= github.com/minio/minio-go v0.0.0-20190131015406-c8a261de75c1 h1:jw16EimP5oAEM/2wt+SiEUov/YDyTCTDuPtIKgQIvk0= github.com/minio/minio-go v0.0.0-20190131015406-c8a261de75c1/go.mod h1:vuvdOZLJuf5HmJAJrKV64MmozrSsk+or0PB5dzdfspg= github.com/mitchellh/go-homedir v0.0.0-20180523094522-3864e76763d9/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0= github.com/mitchellh/go-homedir v1.1.0 h1:lukF9ziXFxDFPkA1vsr5zpc1XuPDn/wFntq5mG+4E0Y= github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0= -github.com/mitchellh/go-testing-interface v1.0.0 h1:fzU/JVNcaqHQEcVFAKeR41fkiLdIPrefOvVG1VZ96U0= github.com/mitchellh/go-testing-interface v1.0.0/go.mod h1:kRemZodwjscx+RGhAo8eIhFbs2+BFgRtFPeD/KE+zxI= github.com/mitchellh/reflectwalk v1.0.0/go.mod h1:mSTlrgnPZtwu0c4WaC2kGObEpuNDbx0jmZXqmk4esnw= -github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg= github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= -github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742 h1:Esafd1046DLDQ0W1YjYsBW+p8U2u7vzgW2SQVmlNazg= github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= github.com/montanaflynn/stats v0.0.0-20180911141734-db72e6cae808/go.mod h1:wL8QJuTMNUDYhXwkmfOly8iTdp5TEcJFWZD2D7SIkUc= github.com/mozillazg/go-cos v0.11.0 h1:PUaIGDL6Jxgrzqg0XvZ6y32jS/5z26+UxnX6ENMm7yE= @@ -203,9 +189,7 @@ github.com/oklog/ulid v1.3.1/go.mod h1:CirwcVhetQ6Lv90oh/F+FBtV6XMibvdAFo93nm5qn github.com/olekukonko/tablewriter v0.0.0-20180912035003-be2c049b30cc/go.mod h1:vsDQFd/mU46D+Z4whnwzcISnGGzXWMclvtLoiIKAKIo= github.com/olekukonko/tablewriter v0.0.1 h1:b3iUnf1v+ppJiOfNX4yxxqfWKMQPZR5yoh8urCTFX88= github.com/olekukonko/tablewriter v0.0.1/go.mod h1:vsDQFd/mU46D+Z4whnwzcISnGGzXWMclvtLoiIKAKIo= -github.com/onsi/ginkgo v1.6.0 h1:Ix8l273rp3QzYgXSR+c8d1fTG7UPgYkOSELPhiY/YGw= github.com/onsi/ginkgo v1.6.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= -github.com/onsi/gomega v1.4.1 h1:PZSj/UFNaVp3KxrzHOcS7oyuWA7LoOY/77yCTEFu21U= github.com/onsi/gomega v1.4.1/go.mod h1:C1qb7wdrVGGVU+Z6iS04AVkA3Q65CEZX59MT0QO5uiA= github.com/opentracing-contrib/go-stdlib v0.0.0-20170113013457-1de4cc2120e7 h1:8KbikWulLUcMM96hBxjgoo6gTmCkG6HYSDohv/WygYU= github.com/opentracing-contrib/go-stdlib v0.0.0-20170113013457-1de4cc2120e7/go.mod h1:PLldrQSroqzH70Xl+1DQcGnefIbqsKR7UDaiux3zV+w= @@ -218,7 +202,6 @@ github.com/openzipkin/zipkin-go v0.1.1/go.mod h1:NtoC/o8u3JlF1lSlyPNswIbeQH9bJTm github.com/openzipkin/zipkin-go v0.1.3/go.mod h1:NtoC/o8u3JlF1lSlyPNswIbeQH9bJTmOf0Erfk+hxe8= github.com/pascaldekloe/goe v0.0.0-20180627143212-57f6aae5913c h1:Lgl0gzECD8GnQ5QCWA8o6BtfL6mDH5rQgM4/fX3avOs= github.com/pascaldekloe/goe v0.0.0-20180627143212-57f6aae5913c/go.mod h1:lzWF7FIEvWOWxwDKqyGYQf6ZUaNfKdP144TG7ZOy1lc= -github.com/peterbourgon/diskv v0.0.0-20180312054125-0646ccaebea1 h1:k/dnb0bixQwWsDLxwr6/w7rtZCVDKdbQnGQkeZGYsws= github.com/peterbourgon/diskv v0.0.0-20180312054125-0646ccaebea1/go.mod h1:uqqh8zWWbv1HBMNONnaR/tNboyR3/BZd58JJSHlUSCU= github.com/peterbourgon/g2s v0.0.0-20170223122336-d4e7ad98afea/go.mod h1:1VcHEd3ro4QMoHfiNl/j7Jkln9+KQuorp0PItHMJYNg= github.com/petermattis/goid v0.0.0-20170504144140-0ded85884ba5/go.mod h1:jvVRKCrJTQWu0XVbaOlby/2lO20uSCHEMzzplHXte1o= @@ -243,14 +226,13 @@ github.com/prometheus/procfs v0.0.0-20180725123919-05ee40e3a273/go.mod h1:c3At6R github.com/prometheus/procfs v0.0.0-20181005140218-185b4288413d/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk= github.com/prometheus/procfs v0.0.0-20181204211112-1dc9a6cbc91a h1:9a8MnZMP0X2nLJdBg+pBmGgkJlSaKC2KaQmTCk1XDtE= github.com/prometheus/procfs v0.0.0-20181204211112-1dc9a6cbc91a/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk= -github.com/prometheus/prometheus v0.0.0-20190118110214-3bd41cc92c78 h1:XGymJlAz2gJI0W/v9hZP7tBYTDdaK1Yq2lMNcXGb2Lo= -github.com/prometheus/prometheus v0.0.0-20190118110214-3bd41cc92c78/go.mod h1:aNuqjQW47j/yc/kN0cNZ8cOTBlBU8VbspEvNDfxe1To= -github.com/prometheus/tsdb v0.4.0 h1:pXJyEi/5p6UBmOrnzsZmYxLrZjxnRlEB78/qj3+a8Gk= -github.com/prometheus/tsdb v0.4.0/go.mod h1:qhTCs0VvXwvX/y3TZrWD7rabWM+ijKTux40TwIPHuXU= +github.com/prometheus/prometheus v0.0.0-20190328180107-4d60eb36dcbe h1:Wpwxfdtx0+/3dgqBvUAB2CTRtHRimQrH4RTEJ5yxCUo= +github.com/prometheus/prometheus v0.0.0-20190328180107-4d60eb36dcbe/go.mod h1:mVHnNN4pf1p9fM7dRvZQAKj0Ex93s+mIhtsml8SJy5E= +github.com/prometheus/tsdb v0.6.1 h1:wUp5iY/rmwv6VNzZg4yaUVpkSs4nX5O5+aoviWTNeUo= +github.com/prometheus/tsdb v0.6.1/go.mod h1:qhTCs0VvXwvX/y3TZrWD7rabWM+ijKTux40TwIPHuXU= github.com/rcrowley/go-metrics v0.0.0-20181016184325-3113b8401b8a/go.mod h1:bCqnVzQkZxMG4s8nGwiZ5l3QUCyqpo9Y+/ZMZ9VjZe4= github.com/rlmcpherson/s3gof3r v0.5.0/go.mod h1:s7vv7SMDPInkitQMuZzH615G7yWHdrU2r/Go7Bo71Rs= github.com/rubyist/circuitbreaker v2.2.1+incompatible/go.mod h1:Ycs3JgJADPuzJDwffe12k6BZT8hxVi6lFK+gWYJLN4A= -github.com/samuel/go-zookeeper v0.0.0-20161028232340-1d7be4effb13 h1:4AQBn5RJY4WH8t8TLEMZUsWeXHAUcoao42TCAfpEJJE= github.com/samuel/go-zookeeper v0.0.0-20161028232340-1d7be4effb13/go.mod h1:gi+0XIa01GRL2eRQVjQkKGqKF3SF9vZR/HnPullcV2E= github.com/sasha-s/go-deadlock v0.0.0-20161201235124-341000892f3d/go.mod h1:StQn567HiB1fF2yJ44N9au7wOhrPS3iZqiDbRupzT10= github.com/satori/go.uuid v1.2.0/go.mod h1:dA0hQrYB0VpLJoorglMZABFdXlWrHn1NEOzdhQKdks0= @@ -270,10 +252,11 @@ github.com/spaolacci/murmur3 v0.0.0-20180118202830-f09979ecbc72/go.mod h1:JwIasO github.com/spf13/pflag v1.0.3 h1:zPAT6CGy6wXeQ7NtTnaTerfKOsV6V6F8agHXFiazDkg= github.com/spf13/pflag v1.0.3/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= -github.com/stretchr/testify v1.2.2 h1:bSDNvY7ZPG5RlJ8otE/7V6gMiyenm9RtJ7IUVIAoJ1w= github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= +github.com/stretchr/testify v1.3.0 h1:TivCn/peBQ7UY8ooIcPgZFpTNSz0Q2U6UrFlUfqbe0Q= github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= go.opencensus.io v0.18.0/go.mod h1:vKdFvxhtzZ9onBp9VKHK8z/sRpBMnKAsufL7wlDrCOA= +go.opencensus.io v0.18.1-0.20181204023538-aab39bd6a98b/go.mod h1:vKdFvxhtzZ9onBp9VKHK8z/sRpBMnKAsufL7wlDrCOA= go.opencensus.io v0.19.0 h1:+jrnNy8MR4GZXvwF9PEuSyHxA4NaTf6601oNRwCSXq0= go.opencensus.io v0.19.0/go.mod h1:AYeH0+ZxYyghG8diqaaIq/9P3VgCCt5GF2ldCY4dkFg= golang.org/x/crypto v0.0.0-20180621125126-a49355c7e3f8/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= @@ -288,7 +271,6 @@ golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73r golang.org/x/net v0.0.0-20181106065722-10aee1819953/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20181201002055-351d144fa1fc/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20181217023233-e147a9138326/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/net v0.0.0-20190125091013-d26f9f9a57f3 h1:ulvT7fqt0yHWzpJwI57MezWnYDVpCAYBVuYst/L+fAY= golang.org/x/net v0.0.0-20190125091013-d26f9f9a57f3/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20190213061140-3a22650c66bd h1:HuTn7WObtcDo9uEEU7rEqL0jYthdXAmZ6PP+meazmaU= golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= @@ -296,7 +278,6 @@ golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAG golang.org/x/oauth2 v0.0.0-20181203162652-d668ce993890 h1:uESlIz09WIHT2I+pasSXcpLYqYK8wHcdCetU3VuMBJE= golang.org/x/oauth2 v0.0.0-20181203162652-d668ce993890/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.0.0-20181108010431-42b317875d0f h1:Bl/8QSvNqXvPGPGXa2z5xUTmV7VDcZyvRZ+QQXkXTZQ= golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4 h1:YUO/7uOKsKeq9UokNS62b8FYywz3ker1l1vDZRCRefw= golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= @@ -304,25 +285,21 @@ golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5h golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20181107165924-66b7b1311ac8/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20181218192612-074acd46bca6/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20190124100055-b90733256f2e h1:3GIlrlVLfkoipSReOMNAgApI0ajnalyLa/EZHHca/XI= golang.org/x/sys v0.0.0-20190124100055-b90733256f2e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20190222072716-a9d3bda3a223 h1:DH4skfRX4EBpamg7iV4ZlCpblAHI6s6TDM39bFZumv8= -golang.org/x/sys v0.0.0-20190222072716-a9d3bda3a223/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190322080309-f49334f85ddc h1:4gbWbmmPFp4ySWICouJl6emP0MyS31yy9SrTlAGFT+g= +golang.org/x/sys v0.0.0-20190322080309-f49334f85ddc/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2 h1:z99zHgr7hKfrUcX/KsoJk5FJfjTceCKIp96+biqP4To= golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= -golang.org/x/time v0.0.0-20170424234030-8be79e1e0910 h1:bCMaBn7ph495H+x72gEvgcv+mDRd9dElbzo/mVCMxX4= golang.org/x/time v0.0.0-20170424234030-8be79e1e0910/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/tools v0.0.0-20180828015842-6cd1fcedba52/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20181023010539-40a48ad93fbe/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20181219222714-6e267b5cc78e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= -google.golang.org/api v0.0.0-20180506000402-20530fd5d65a/go.mod h1:4mhQ8q/RsB7i+udVvVy5NUi08OU8ZlA0gRVgrF7VFY0= google.golang.org/api v0.0.0-20180910000450-7ca32eb868bf/go.mod h1:4mhQ8q/RsB7i+udVvVy5NUi08OU8ZlA0gRVgrF7VFY0= google.golang.org/api v0.0.0-20181220000619-583d854617af/go.mod h1:4mhQ8q/RsB7i+udVvVy5NUi08OU8ZlA0gRVgrF7VFY0= google.golang.org/api v0.1.0 h1:K6z2u68e86TPdSdefXdzvXgR1zEMa+459vBSfWYAZkI= google.golang.org/api v0.1.0/go.mod h1:UGEZY7KEX120AnNLIHFMKIo4obdJhkp2tPbaPlQx13Y= google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= -google.golang.org/appengine v1.3.0 h1:FBSsiFRMz3LBeXIomRnVzrQwSDj4ibvcRexLG0LZGQk= google.golang.org/appengine v1.3.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= google.golang.org/appengine v1.4.0 h1:/wp5JvzpHIxhs/dumFmF7BXTf3Z+dd4uXta4kVyO508= google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= @@ -332,6 +309,7 @@ google.golang.org/genproto v0.0.0-20181202183823-bd91e49a0898/go.mod h1:7Ep/1NZk google.golang.org/genproto v0.0.0-20181219182458-5a97ab628bfb h1:dQshZyyJ5W/Xk8myF4GKBak1pZW6EywJuQ8+44EQhGA= google.golang.org/genproto v0.0.0-20181219182458-5a97ab628bfb/go.mod h1:7Ep/1NZk928CDR8SjdVbjWNpdIf6nzjE3BTgJDr2Atg= google.golang.org/grpc v1.14.0/go.mod h1:yo6s7OP7yaDglbqo1J04qKzAhqBH6lvTonzMVmEdcZw= +google.golang.org/grpc v1.15.0/go.mod h1:0JHn/cJsOMiMfNA9+DeHDlAU7KAAB5GDlYFpa9MZMio= google.golang.org/grpc v1.16.0/go.mod h1:0JHn/cJsOMiMfNA9+DeHDlAU7KAAB5GDlYFpa9MZMio= google.golang.org/grpc v1.17.0 h1:TRJYBgMclJvGYn2rIMjj+h9KtMt5r1Ij7ODVRIZkwhk= google.golang.org/grpc v1.17.0/go.mod h1:6QZJwpn2B+Zp71q/5VxRsJ6NXXVCE5NRUHRo+f3cWCs= @@ -341,38 +319,28 @@ gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8 gopkg.in/check.v1 v1.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 h1:qIbj1fsPNlZgppZ+VLlY7N33q108Sa+fhmuc+sWQYwY= gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= -gopkg.in/fsnotify.v1 v1.4.7 h1:xOHLXZwVvI9hhs+cLKq5+I5onOuwQLhQwiu63xxlHs4= gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys= gopkg.in/fsnotify/fsnotify.v1 v1.3.0/go.mod h1:Fyux9zXlo4rWoMSIzpn9fDAYjalPqJ/K1qJ27s+7ltE= gopkg.in/fsnotify/fsnotify.v1 v1.4.7 h1:XNNYLJHt73EyYiCZi6+xjupS9CpvmiDgjPTAjrBlQbo= gopkg.in/fsnotify/fsnotify.v1 v1.4.7/go.mod h1:Fyux9zXlo4rWoMSIzpn9fDAYjalPqJ/K1qJ27s+7ltE= -gopkg.in/inf.v0 v0.9.0 h1:3zYtXIO92bvsdS3ggAdA8Gb4Azj0YU+TVY1uGYNFA8o= gopkg.in/inf.v0 v0.9.0/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw= gopkg.in/ini.v1 v1.41.0 h1:Ka3ViY6gNYSKiVy71zXBEqKplnV35ImDLVG+8uoIklE= gopkg.in/ini.v1 v1.41.0/go.mod h1:pNLf8WUiyNEtQjuu5G5vTm06TEv9tsIgeAvK8hOrP4k= -gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 h1:uRGJdciOHaEIrze2W8Q3AKkepLTh2hOroT7a+7czfdQ= gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWDmTeBkI65Dw0HsyUHuEVlX15mw= gopkg.in/vmihailenco/msgpack.v2 v2.9.1 h1:kb0VV7NuIojvRfzwslQeP3yArBqJHW9tOl4t38VS1jM= gopkg.in/vmihailenco/msgpack.v2 v2.9.1/go.mod h1:/3Dn1Npt9+MYyLpYYXjInO/5jvMLamn+AEGwNEOatn8= -gopkg.in/yaml.v2 v2.2.0/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.2.2 h1:ZCJp+EgiOT7lHqUV2J862kp8Qj64Jo6az82+3Td9dZw= gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= honnef.co/go/tools v0.0.0-20180728063816-88497007e858/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= honnef.co/go/tools v0.0.0-20180920025451-e3ad64cb4ed3/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= -k8s.io/api v0.0.0-20181213150558-05914d821849 h1:WZFcFPXmLR7g5CxQNmjWv0mg8qulJLxDghbzS4pQtzY= k8s.io/api v0.0.0-20181213150558-05914d821849/go.mod h1:iuAfoD4hCxJ8Onx9kaTIt30j7jUFS00AXQi6QMi99vA= -k8s.io/apimachinery v0.0.0-20181127025237-2b1284ed4c93 h1:tT6oQBi0qwLbbZSfDkdIsb23EwaLY85hoAV4SpXfdao= k8s.io/apimachinery v0.0.0-20181127025237-2b1284ed4c93/go.mod h1:ccL7Eh7zubPUSh9A3USN90/OzHNSVN6zxzde07TDCL0= -k8s.io/client-go v2.0.0-alpha.0.0.20181121191925-a47917edff34+incompatible h1:7JnS1I1KbtbearjSCrycUhHSob+KjG6HDWY1GhjkAIU= k8s.io/client-go v2.0.0-alpha.0.0.20181121191925-a47917edff34+incompatible/go.mod h1:7vJpHMYJwNQCWgzmNV+VYUl1zCObLyodBc8nIyt8L5s= -k8s.io/klog v0.1.0 h1:I5HMfc/DtuVaGR1KPwUrTc476K8NCqNBldC7H4dYEzk= k8s.io/klog v0.1.0/go.mod h1:Gq+BEi5rUBO/HRz0bTSXDUcqjScdoY3a9IHpCEIOOfk= -k8s.io/kube-openapi v0.0.0-20180629012420-d83b052f768a h1:tHgpQvrWaYfrnC8G4N0Oszw5HHCsZxKilDi2R7HuCSM= k8s.io/kube-openapi v0.0.0-20180629012420-d83b052f768a/go.mod h1:BXM9ceUBTj2QnfH2MK1odQs778ajze1RxcmP6S8RVVc= labix.org/v2/mgo v0.0.0-20140701140051-000000000287 h1:L0cnkNl4TfAXzvdrqsYEmxOHOCv2p5I3taaReO8BWFs= labix.org/v2/mgo v0.0.0-20140701140051-000000000287/go.mod h1:Lg7AYkt1uXJoR9oeSZ3W/8IXLdvOfIITgZnommstyz4= launchpad.net/gocheck v0.0.0-20140225173054-000000000087 h1:Izowp2XBH6Ya6rv+hqbceQyw/gSGoXfH/UPoTGduL54= launchpad.net/gocheck v0.0.0-20140225173054-000000000087/go.mod h1:hj7XX3B/0A+80Vse0e+BUHsHMTEhd0O4cpUHr/e/BUM= -sigs.k8s.io/yaml v1.1.0 h1:4A07+ZFc2wgJwo8YNlQpr1rVlgUDlxXHhPJciaPY5gs= sigs.k8s.io/yaml v1.1.0/go.mod h1:UJmg0vDUVViEyp3mgSv9WPwZCDxu4rQW1olrI1uml+o= diff --git a/netlify.toml b/netlify.toml new file mode 100644 index 0000000000..f5c7576308 --- /dev/null +++ b/netlify.toml @@ -0,0 +1,19 @@ +[Settings] + +# All netfliy options should be defined here. UI options should stay empty. + +[build] +base = "" +publish = "website/public" + +# Our Makefile build hugo, but it's faster if netlify grab the correct version on their own +# via simple curl. +environment = { HUGO_VERSION="0.55.3" } + +# NOTE: Sleep at then is to make sure logs are not truncated on error. +command = "(env && make web HUGO=$(which hugo)) || (sleep 30; false)" + +[context.deploy-preview] + +# NOTE: Sleep at then is to make sure logs are not truncated on error. +command = "(env && make web HUGO=$(which hugo) WEBSITE_BASE_URL=${DEPLOY_PRIME_URL}) || (sleep 30; false)" \ No newline at end of file diff --git a/pkg/block/block.go b/pkg/block/block.go index 008e1798d4..5c1033a7d8 100644 --- a/pkg/block/block.go +++ b/pkg/block/block.go @@ -25,6 +25,8 @@ const ( MetaFilename = "meta.json" // IndexFilename is the known index file for block index. IndexFilename = "index" + // IndexCacheFilename is the canonical name for index cache file that stores essential information needed. + IndexCacheFilename = "index.cache.json" // ChunksDirname is the known dir name for chunks with compressed samples. ChunksDirname = "chunks" @@ -93,6 +95,12 @@ func Upload(ctx context.Context, logger log.Logger, bkt objstore.Bucket, bdir st return cleanUp(bkt, id, errors.Wrap(err, "upload index")) } + if meta.Thanos.Source == metadata.CompactorSource { + if err := objstore.UploadFile(ctx, logger, bkt, path.Join(bdir, IndexCacheFilename), path.Join(id.String(), IndexCacheFilename)); err != nil { + return cleanUp(bkt, id, errors.Wrap(err, "upload index cache")) + } + } + // Meta.json always need to be uploaded as a last item. This will allow to assume block directories without meta file // to be pending uploads. if err := objstore.UploadFile(ctx, logger, bkt, path.Join(bdir, MetaFilename), path.Join(id.String(), MetaFilename)); err != nil { diff --git a/pkg/block/index.go b/pkg/block/index.go index 777654f2e6..f9f36d55aa 100644 --- a/pkg/block/index.go +++ b/pkg/block/index.go @@ -26,8 +26,10 @@ import ( "github.com/prometheus/tsdb/labels" ) -// IndexCacheFilename is the canonical name for index cache files. -const IndexCacheFilename = "index.cache.json" +const ( + // IndexCacheVersion is a enumeration of index cache versions supported by Thanos. + IndexCacheVersion1 = iota + 1 +) type postingsRange struct { Name, Value string @@ -35,10 +37,11 @@ type postingsRange struct { } type indexCache struct { - Version int - Symbols map[uint32]string - LabelValues map[string][]string - Postings []postingsRange + Version int + CacheVersion int + Symbols map[uint32]string + LabelValues map[string][]string + Postings []postingsRange } type realByteSlice []byte @@ -112,9 +115,10 @@ func WriteIndexCache(logger log.Logger, indexFn string, fn string) error { defer runutil.CloseWithLogOnErr(logger, f, "index cache writer") v := indexCache{ - Version: indexr.Version(), - Symbols: symbols, - LabelValues: map[string][]string{}, + Version: indexr.Version(), + CacheVersion: IndexCacheVersion1, + Symbols: symbols, + LabelValues: map[string][]string{}, } // Extract label value indices. @@ -336,7 +340,7 @@ func GatherIndexIssueStats(logger log.Logger, fn string, minTime int64, maxTime if err != nil { return stats, errors.Wrap(err, "open index file") } - defer runutil.CloseWithErrCapture(logger, &err, r, "gather index issue file reader") + defer runutil.CloseWithErrCapture(&err, r, "gather index issue file reader") p, err := r.Postings(index.AllPostingsKey()) if err != nil { @@ -456,19 +460,19 @@ func Repair(logger log.Logger, dir string, id ulid.ULID, source metadata.SourceT if err != nil { return resid, errors.Wrap(err, "open block") } - defer runutil.CloseWithErrCapture(logger, &err, b, "repair block reader") + defer runutil.CloseWithErrCapture(&err, b, "repair block reader") indexr, err := b.Index() if err != nil { return resid, errors.Wrap(err, "open index") } - defer runutil.CloseWithErrCapture(logger, &err, indexr, "repair index reader") + defer runutil.CloseWithErrCapture(&err, indexr, "repair index reader") chunkr, err := b.Chunks() if err != nil { return resid, errors.Wrap(err, "open chunks") } - defer runutil.CloseWithErrCapture(logger, &err, chunkr, "repair chunk reader") + defer runutil.CloseWithErrCapture(&err, chunkr, "repair chunk reader") resdir := filepath.Join(dir, resid.String()) @@ -476,13 +480,13 @@ func Repair(logger log.Logger, dir string, id ulid.ULID, source metadata.SourceT if err != nil { return resid, errors.Wrap(err, "open chunk writer") } - defer runutil.CloseWithErrCapture(logger, &err, chunkw, "repair chunk writer") + defer runutil.CloseWithErrCapture(&err, chunkw, "repair chunk writer") indexw, err := index.NewWriter(filepath.Join(resdir, IndexFilename)) if err != nil { return resid, errors.Wrap(err, "open index writer") } - defer runutil.CloseWithErrCapture(logger, &err, indexw, "repair index writer") + defer runutil.CloseWithErrCapture(&err, indexw, "repair index writer") // TODO(fabxc): adapt so we properly handle the version once we update to an upstream // that has multiple. @@ -531,7 +535,7 @@ func IgnoreDuplicateOutsideChunk(_ int64, _ int64, last *chunks.Meta, curr *chun // the current one. if curr.MinTime != last.MinTime || curr.MaxTime != last.MaxTime { return false, errors.Errorf("non-sequential chunks not equal: [%d, %d] and [%d, %d]", - last.MaxTime, last.MaxTime, curr.MinTime, curr.MaxTime) + last.MinTime, last.MaxTime, curr.MinTime, curr.MaxTime) } ca := crc32.Checksum(last.Chunk.Bytes(), castagnoli) cb := crc32.Checksum(curr.Chunk.Bytes(), castagnoli) @@ -559,9 +563,14 @@ func sanitizeChunkSequence(chks []chunks.Meta, mint int64, maxt int64, ignoreChk var last *chunks.Meta OUTER: - for _, c := range chks { + // This compares the current chunk to the chunk from the last iteration + // by pointers. If we use "i, c := range chks" the variable c is a new + // variable who's address doesn't change through the entire loop. + // The current element of the chks slice is copied into it. We must take + // the address of the indexed slice instead. + for i := range chks { for _, ignoreChkFn := range ignoreChkFns { - ignore, err := ignoreChkFn(mint, maxt, last, &c) + ignore, err := ignoreChkFn(mint, maxt, last, &chks[i]) if err != nil { return nil, errors.Wrap(err, "ignore function") } @@ -571,13 +580,18 @@ OUTER: } } - last = &c - repl = append(repl, c) + last = &chks[i] + repl = append(repl, chks[i]) } return repl, nil } +type seriesRepair struct { + lset labels.Labels + chks []chunks.Meta +} + // rewrite writes all data from the readers back into the writers while cleaning // up mis-ordered and duplicated chunks. func rewrite( @@ -605,17 +619,20 @@ func rewrite( postings = index.NewMemPostings() values = map[string]stringset{} i = uint64(0) + series = []seriesRepair{} ) - var lset labels.Labels - var chks []chunks.Meta - for all.Next() { + var lset labels.Labels + var chks []chunks.Meta id := all.At() if err := indexr.Series(id, &lset, &chks); err != nil { return err } + // Make sure labels are in sorted order. + sort.Sort(lset) + for i, c := range chks { chks[i].Chunk, err = chunkr.Chunk(c.Ref) if err != nil { @@ -632,21 +649,39 @@ func rewrite( continue } - if err := chunkw.WriteChunks(chks...); err != nil { + series = append(series, seriesRepair{ + lset: lset, + chks: chks, + }) + } + + if all.Err() != nil { + return errors.Wrap(all.Err(), "iterate series") + } + + // Sort the series, if labels are re-ordered then the ordering of series + // will be different. + sort.Slice(series, func(i, j int) bool { + return labels.Compare(series[i].lset, series[j].lset) < 0 + }) + + // Build a new TSDB block. + for _, s := range series { + if err := chunkw.WriteChunks(s.chks...); err != nil { return errors.Wrap(err, "write chunks") } - if err := indexw.AddSeries(i, lset, chks...); err != nil { + if err := indexw.AddSeries(i, s.lset, s.chks...); err != nil { return errors.Wrap(err, "add series") } - meta.Stats.NumChunks += uint64(len(chks)) + meta.Stats.NumChunks += uint64(len(s.chks)) meta.Stats.NumSeries++ - for _, chk := range chks { + for _, chk := range s.chks { meta.Stats.NumSamples += uint64(chk.Chunk.NumSamples()) } - for _, l := range lset { + for _, l := range s.lset { valset, ok := values[l.Name] if !ok { valset = stringset{} @@ -654,12 +689,9 @@ func rewrite( } valset.set(l.Value) } - postings.Add(i, lset) + postings.Add(i, s.lset) i++ } - if all.Err() != nil { - return errors.Wrap(all.Err(), "iterate series") - } s := make([]string, 0, 256) for n, v := range values { diff --git a/pkg/block/index_test.go b/pkg/block/index_test.go index 80c10e8e6e..36a21f3ea3 100644 --- a/pkg/block/index_test.go +++ b/pkg/block/index_test.go @@ -1,6 +1,7 @@ package block import ( + "context" "io/ioutil" "os" "path/filepath" @@ -12,11 +13,13 @@ import ( ) func TestWriteReadIndexCache(t *testing.T) { + ctx := context.Background() + tmpDir, err := ioutil.TempDir("", "test-compact-prepare") testutil.Ok(t, err) defer func() { testutil.Ok(t, os.RemoveAll(tmpDir)) }() - b, err := testutil.CreateBlock(tmpDir, []labels.Labels{ + b, err := testutil.CreateBlock(ctx, tmpDir, []labels.Labels{ {{Name: "a", Value: "1"}}, {{Name: "a", Value: "2"}}, {{Name: "a", Value: "3"}}, diff --git a/pkg/block/metadata/meta.go b/pkg/block/metadata/meta.go index 0d8b22dd19..9003e92904 100644 --- a/pkg/block/metadata/meta.go +++ b/pkg/block/metadata/meta.go @@ -23,6 +23,7 @@ type SourceType string const ( UnknownSource SourceType = "" SidecarSource SourceType = "sidecar" + ReceiveSource SourceType = "receive" CompactorSource SourceType = "compactor" CompactorRepairSource SourceType = "compactor.repair" RulerSource SourceType = "ruler" @@ -36,7 +37,7 @@ const ( ) const ( - // MetaVersion is a enumeration of versions supported by Thanos. + // MetaVersion is a enumeration of meta versions supported by Thanos. MetaVersion1 = iota + 1 ) diff --git a/pkg/compact/compact.go b/pkg/compact/compact.go index 6fd89b76e9..c9c21cdeb0 100644 --- a/pkg/compact/compact.go +++ b/pkg/compact/compact.go @@ -3,16 +3,17 @@ package compact import ( "context" "fmt" + "io/ioutil" "os" + "path" "path/filepath" "sort" + "strings" "sync" "time" "github.com/improbable-eng/thanos/pkg/block/metadata" - "io/ioutil" - "github.com/go-kit/kit/log" "github.com/go-kit/kit/log/level" "github.com/improbable-eng/thanos/pkg/block" @@ -31,6 +32,8 @@ const ( ResolutionLevelRaw = ResolutionLevel(downsample.ResLevel0) ResolutionLevel5m = ResolutionLevel(downsample.ResLevel1) ResolutionLevel1h = ResolutionLevel(downsample.ResLevel2) + + MinimumAgeForRemoval = time.Duration(30 * time.Minute) ) var blockTooFreshSentinelError = errors.New("Block too fresh") @@ -41,7 +44,7 @@ type Syncer struct { logger log.Logger reg prometheus.Registerer bkt objstore.Bucket - syncDelay time.Duration + consistencyDelay time.Duration mtx sync.Mutex blocks map[ulid.ULID]*metadata.Meta blocksMtx sync.Mutex @@ -60,6 +63,9 @@ type syncerMetrics struct { garbageCollectionDuration prometheus.Histogram compactions *prometheus.CounterVec compactionFailures *prometheus.CounterVec + indexCacheBlocks prometheus.Counter + indexCacheTraverse prometheus.Counter + indexCacheFailures prometheus.Counter } func newSyncerMetrics(reg prometheus.Registerer) *syncerMetrics { @@ -129,14 +135,14 @@ func newSyncerMetrics(reg prometheus.Registerer) *syncerMetrics { // NewSyncer returns a new Syncer for the given Bucket and directory. // Blocks must be at least as old as the sync delay for being considered. -func NewSyncer(logger log.Logger, reg prometheus.Registerer, bkt objstore.Bucket, syncDelay time.Duration, blockSyncConcurrency int, acceptMalformedIndex bool) (*Syncer, error) { +func NewSyncer(logger log.Logger, reg prometheus.Registerer, bkt objstore.Bucket, consistencyDelay time.Duration, blockSyncConcurrency int, acceptMalformedIndex bool) (*Syncer, error) { if logger == nil { logger = log.NewNopLogger() } return &Syncer{ logger: logger, reg: reg, - syncDelay: syncDelay, + consistencyDelay: consistencyDelay, blocks: map[ulid.ULID]*metadata.Meta{}, bkt: bkt, metrics: newSyncerMetrics(reg), @@ -146,7 +152,8 @@ func NewSyncer(logger log.Logger, reg prometheus.Registerer, bkt objstore.Bucket } // SyncMetas synchronizes all meta files from blocks in the bucket into -// the memory. +// the memory. It removes any partial blocks older than the max of +// consistencyDelay and MinimumAgeForRemoval from the bucket. func (c *Syncer) SyncMetas(ctx context.Context) error { c.mtx.Lock() defer c.mtx.Unlock() @@ -191,6 +198,9 @@ func (c *Syncer) syncMetas(ctx context.Context) error { continue } if err != nil { + if removedOrIgnored := c.removeIfMetaMalformed(workCtx, id); removedOrIgnored { + continue + } errChan <- err return } @@ -247,6 +257,10 @@ func (c *Syncer) downloadMeta(ctx context.Context, id ulid.ULID) (*metadata.Meta meta, err := block.DownloadMeta(ctx, c.logger, c.bkt, id) if err != nil { + if ulid.Now()-id.Time() < uint64(c.consistencyDelay/time.Millisecond) { + level.Debug(c.logger).Log("msg", "block is too fresh for now", "block", id) + return nil, blockTooFreshSentinelError + } return nil, errors.Wrapf(err, "downloading meta.json for %s", id) } @@ -256,7 +270,7 @@ func (c *Syncer) downloadMeta(ctx context.Context, id ulid.ULID) (*metadata.Meta // - compactor created blocks // NOTE: It is not safe to miss "old" block (even that it is newly created) in sync step. Compactor needs to aware of ALL old blocks. // TODO(bplotka): https://github.com/improbable-eng/thanos/issues/377 - if ulid.Now()-id.Time() < uint64(c.syncDelay/time.Millisecond) && + if ulid.Now()-id.Time() < uint64(c.consistencyDelay/time.Millisecond) && meta.Thanos.Source != metadata.BucketRepairSource && meta.Thanos.Source != metadata.CompactorSource && meta.Thanos.Source != metadata.CompactorRepairSource { @@ -268,6 +282,33 @@ func (c *Syncer) downloadMeta(ctx context.Context, id ulid.ULID) (*metadata.Meta return &meta, nil } +// removeIfMalformed removes a block from the bucket if that block does not have a meta file. It ignores blocks that +// are younger than MinimumAgeForRemoval. +func (c *Syncer) removeIfMetaMalformed(ctx context.Context, id ulid.ULID) (removedOrIgnored bool) { + metaExists, err := c.bkt.Exists(ctx, path.Join(id.String(), block.MetaFilename)) + if err != nil { + level.Warn(c.logger).Log("msg", "failed to check meta exists for block", "block", id, "err", err) + return false + } + if metaExists { + // Meta exists, block is not malformed. + return false + } + + if ulid.Now()-id.Time() <= uint64(MinimumAgeForRemoval/time.Millisecond) { + // Minimum delay has not expired, ignore for now + return true + } + + if err := block.Delete(ctx, c.bkt, id); err != nil { + level.Warn(c.logger).Log("msg", "failed to delete malformed block", "block", id, "err", err) + return false + } + level.Info(c.logger).Log("msg", "deleted malformed block", "block", id) + + return true +} + // GroupKey returns a unique identifier for the group the block belongs to. It considers // the downsampling resolution and the block's labels. func GroupKey(meta metadata.Meta) string { @@ -534,7 +575,6 @@ func (cg *Group) Compact(ctx context.Context, dir string, comp tsdb.Compactor) ( cg.compactionFailures.Inc() } cg.compactions.Inc() - return shouldRerun, compID, err } @@ -812,6 +852,8 @@ func (cg *Group) compact(ctx context.Context, dir string, comp tsdb.Compactor) ( "blocks", fmt.Sprintf("%v", plan), "duration", time.Since(begin)) bdir := filepath.Join(dir, compID.String()) + index := filepath.Join(bdir, block.IndexFilename) + indexCache := filepath.Join(bdir, block.IndexCacheFilename) newMeta, err := metadata.InjectThanos(cg.logger, bdir, metadata.Thanos{ Labels: cg.labels.Map(), @@ -827,7 +869,7 @@ func (cg *Group) compact(ctx context.Context, dir string, comp tsdb.Compactor) ( } // Ensure the output block is valid. - if err := block.VerifyIndex(cg.logger, filepath.Join(bdir, block.IndexFilename), newMeta.MinTime, newMeta.MaxTime); !cg.acceptMalformedIndex && err != nil { + if err := block.VerifyIndex(cg.logger, index, newMeta.MinTime, newMeta.MaxTime); !cg.acceptMalformedIndex && err != nil { return false, ulid.ULID{}, halt(errors.Wrapf(err, "invalid result block %s", bdir)) } @@ -836,6 +878,10 @@ func (cg *Group) compact(ctx context.Context, dir string, comp tsdb.Compactor) ( return false, ulid.ULID{}, halt(errors.Wrapf(err, "resulted compacted block %s overlaps with something", bdir)) } + if err := block.WriteIndexCache(cg.logger, index, indexCache); err != nil { + return false, ulid.ULID{}, errors.Wrap(err, "write index cache") + } + begin = time.Now() if err := block.Upload(ctx, cg.logger, cg.bkt, bdir); err != nil { @@ -878,28 +924,80 @@ func (cg *Group) deleteBlock(b string) error { // BucketCompactor compacts blocks in a bucket. type BucketCompactor struct { - logger log.Logger - sy *Syncer - comp tsdb.Compactor - compactDir string - bkt objstore.Bucket + logger log.Logger + sy *Syncer + comp tsdb.Compactor + compactDir string + bkt objstore.Bucket + concurrency int } // NewBucketCompactor creates a new bucket compactor. -func NewBucketCompactor(logger log.Logger, sy *Syncer, comp tsdb.Compactor, compactDir string, bkt objstore.Bucket) *BucketCompactor { - return &BucketCompactor{ - logger: logger, - sy: sy, - comp: comp, - compactDir: compactDir, - bkt: bkt, +func NewBucketCompactor( + logger log.Logger, + sy *Syncer, + comp tsdb.Compactor, + compactDir string, + bkt objstore.Bucket, + concurrency int, +) (*BucketCompactor, error) { + if concurrency <= 0 { + return nil, errors.New("invalid concurrency level (%d), concurrency level must be > 0") } + return &BucketCompactor{ + logger: logger, + sy: sy, + comp: comp, + compactDir: compactDir, + bkt: bkt, + concurrency: concurrency, + }, nil } // Compact runs compaction over bucket. func (c *BucketCompactor) Compact(ctx context.Context) error { // Loop over bucket and compact until there's no work left. for { + var ( + wg sync.WaitGroup + workCtx, workCtxCancel = context.WithCancel(ctx) + groupChan = make(chan *Group) + errChan = make(chan error, c.concurrency) + finishedAllGroups = true + mtx sync.Mutex + ) + + // Set up workers who will compact the groups when the groups are ready. + // They will compact available groups until they encounter an error, after which they will stop. + for i := 0; i < c.concurrency; i++ { + wg.Add(1) + go func() { + defer wg.Done() + for g := range groupChan { + shouldRerunGroup, _, err := g.Compact(workCtx, c.compactDir, c.comp) + if err == nil { + if shouldRerunGroup { + mtx.Lock() + finishedAllGroups = false + mtx.Unlock() + } + continue + } + + if IsIssue347Error(err) { + if err := RepairIssue347(workCtx, c.logger, c.bkt, err); err == nil { + mtx.Lock() + finishedAllGroups = false + mtx.Unlock() + continue + } + } + errChan <- errors.Wrap(err, fmt.Sprintf("compaction failed for group %s", g.Key())) + return + } + }() + } + // Clean up the compaction temporary directory at the beginning of every compaction loop. if err := os.RemoveAll(c.compactDir); err != nil { return errors.Wrap(err, "clean up the compaction temporary directory") @@ -923,24 +1021,30 @@ func (c *BucketCompactor) Compact(ctx context.Context) error { if err != nil { return errors.Wrap(err, "build compaction groups") } - finishedAllGroups := true + + // Send all groups found during this pass to the compaction workers. + groupLoop: for _, g := range groups { - shouldRerunGroup, _, err := g.Compact(ctx, c.compactDir, c.comp) - if err == nil { - if shouldRerunGroup { - finishedAllGroups = false - } - continue + select { + case err = <-errChan: + break groupLoop + case groupChan <- g: } + } + close(groupChan) + wg.Wait() - if IsIssue347Error(err) { - if err := RepairIssue347(ctx, c.logger, c.bkt, err); err == nil { - finishedAllGroups = false - continue - } + close(errChan) + workCtxCancel() + if err != nil { + errMsgs := []string{err.Error()} + // Collect any other errors reported by the workers. + for e := range errChan { + errMsgs = append(errMsgs, e.Error()) } - return errors.Wrap(err, "compaction") + return errors.New(strings.Join(errMsgs, "; ")) } + if finishedAllGroups { break } diff --git a/pkg/compact/compact_e2e_test.go b/pkg/compact/compact_e2e_test.go index 738048c5c2..f58862c0ac 100644 --- a/pkg/compact/compact_e2e_test.go +++ b/pkg/compact/compact_e2e_test.go @@ -178,7 +178,7 @@ func TestGroup_Compact_e2e(t *testing.T) { var metas []*metadata.Meta extLset := labels.Labels{{Name: "e1", Value: "1"}} - b1, err := testutil.CreateBlock(prepareDir, []labels.Labels{ + b1, err := testutil.CreateBlock(ctx, prepareDir, []labels.Labels{ {{Name: "a", Value: "1"}}, {{Name: "a", Value: "2"}, {Name: "a", Value: "2"}}, {{Name: "a", Value: "3"}}, @@ -190,7 +190,7 @@ func TestGroup_Compact_e2e(t *testing.T) { testutil.Ok(t, err) metas = append(metas, meta) - b3, err := testutil.CreateBlock(prepareDir, []labels.Labels{ + b3, err := testutil.CreateBlock(ctx, prepareDir, []labels.Labels{ {{Name: "a", Value: "3"}}, {{Name: "a", Value: "4"}}, {{Name: "a", Value: "5"}}, @@ -215,7 +215,7 @@ func TestGroup_Compact_e2e(t *testing.T) { metas = append(metas, meta) // Due to TSDB compaction delay (not compacting fresh block), we need one more block to be pushed to trigger compaction. - freshB, err := testutil.CreateBlock(prepareDir, []labels.Labels{ + freshB, err := testutil.CreateBlock(ctx, prepareDir, []labels.Labels{ {{Name: "a", Value: "2"}}, {{Name: "a", Value: "3"}}, {{Name: "a", Value: "4"}}, @@ -251,7 +251,7 @@ func TestGroup_Compact_e2e(t *testing.T) { ) testutil.Ok(t, err) - comp, err := tsdb.NewLeveledCompactor(nil, log.NewLogfmtLogger(os.Stderr), []int64{1000, 3000}, nil) + comp, err := tsdb.NewLeveledCompactor(ctx, nil, log.NewLogfmtLogger(os.Stderr), []int64{1000, 3000}, nil) testutil.Ok(t, err) shouldRerun, id, err := g.Compact(ctx, dir, comp) diff --git a/pkg/compact/compact_test.go b/pkg/compact/compact_test.go index f175fcce93..a5fbe5d790 100644 --- a/pkg/compact/compact_test.go +++ b/pkg/compact/compact_test.go @@ -1,7 +1,13 @@ package compact import ( + "bytes" + "context" + "github.com/improbable-eng/thanos/pkg/objstore/inmem" + "github.com/oklog/ulid" + "path" "testing" + "time" "github.com/improbable-eng/thanos/pkg/testutil" "github.com/pkg/errors" @@ -37,3 +43,37 @@ func TestRetryError(t *testing.T) { err = errors.Wrap(retry(errors.Wrap(halt(errors.New("test")), "something")), "something2") testutil.Assert(t, IsHaltError(err), "not a halt error. Retry should not hide halt error") } + +func TestSyncer_SyncMetas_HandlesMalformedBlocks(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + + bkt := inmem.NewBucket() + sy, err := NewSyncer(nil, nil, bkt, 10*time.Second, 1, false) + testutil.Ok(t, err) + + // Generate 1 block which is older than MinimumAgeForRemoval which has chunk data but no meta. Compactor should delete it. + shouldDeleteId, err := ulid.New(uint64(time.Now().Add(-time.Hour).Unix()*1000), nil) + testutil.Ok(t, err) + + var fakeChunk bytes.Buffer + fakeChunk.Write([]byte{0,1,2,3}) + testutil.Ok(t, bkt.Upload(ctx, path.Join(shouldDeleteId.String(), "chunks", "000001"), &fakeChunk)) + + // Generate 1 block which is older than consistencyDelay but younger than MinimumAgeForRemoval, and which has chunk + // data but no meta. Compactor should ignore it. + shouldIgnoreId, err := ulid.New(uint64(time.Now().Unix()*1000), nil) + testutil.Ok(t, err) + + testutil.Ok(t, bkt.Upload(ctx, path.Join(shouldIgnoreId.String(), "chunks", "000001"), &fakeChunk)) + + testutil.Ok(t, sy.SyncMetas(ctx)) + + exists, err := bkt.Exists(ctx, path.Join(shouldDeleteId.String(), "chunks", "000001")) + testutil.Ok(t, err) + testutil.Equals(t, false, exists) + + exists, err = bkt.Exists(ctx, path.Join(shouldIgnoreId.String(), "chunks", "000001")) + testutil.Ok(t, err) + testutil.Equals(t, true, exists) +} diff --git a/pkg/compact/downsample/downsample.go b/pkg/compact/downsample/downsample.go index 2263d61359..92d07af266 100644 --- a/pkg/compact/downsample/downsample.go +++ b/pkg/compact/downsample/downsample.go @@ -43,13 +43,13 @@ func Downsample( if err != nil { return id, errors.Wrap(err, "open index reader") } - defer runutil.CloseWithErrCapture(logger, &err, indexr, "downsample index reader") + defer runutil.CloseWithErrCapture(&err, indexr, "downsample index reader") chunkr, err := b.Chunks() if err != nil { return id, errors.Wrap(err, "open chunk reader") } - defer runutil.CloseWithErrCapture(logger, &err, chunkr, "downsample chunk reader") + defer runutil.CloseWithErrCapture(&err, chunkr, "downsample chunk reader") // Generate new block id. uid := ulid.MustNew(ulid.Now(), rand.New(rand.NewSource(time.Now().UnixNano()))) @@ -81,12 +81,13 @@ func Downsample( if err != nil { return id, errors.Wrap(err, "get streamed block writer") } - defer runutil.CloseWithErrCapture(logger, &err, streamedBlockWriter, "close stream block writer") + defer runutil.CloseWithErrCapture(&err, streamedBlockWriter, "close stream block writer") postings, err := indexr.Postings(index.AllPostingsKey()) if err != nil { return id, errors.Wrap(err, "get all postings list") } + var ( aggrChunks []*AggrChunk all []sample diff --git a/pkg/compact/downsample/downsample_test.go b/pkg/compact/downsample/downsample_test.go index d54d31f0e9..a93ed7c2e7 100644 --- a/pkg/compact/downsample/downsample_test.go +++ b/pkg/compact/downsample/downsample_test.go @@ -163,6 +163,9 @@ func testDownsample(t *testing.T, data []*downsampleTestSet, meta *metadata.Meta id, err := Downsample(log.NewNopLogger(), meta, mb, dir, resolution) testutil.Ok(t, err) + _, err = metadata.Read(filepath.Join(dir, id.String())) + testutil.Ok(t, err) + exp := map[uint64]map[AggrType][]sample{} got := map[uint64]map[AggrType][]sample{} @@ -390,6 +393,8 @@ type memBlock struct { chunks []chunkenc.Chunk numberOfChunks uint64 + + minTime, maxTime int64 } type series struct { @@ -398,7 +403,7 @@ type series struct { } func newMemBlock() *memBlock { - return &memBlock{symbols: map[string]struct{}{}} + return &memBlock{symbols: map[string]struct{}{}, minTime: -1, maxTime: -1} } func (b *memBlock) addSeries(s *series) { @@ -412,12 +417,34 @@ func (b *memBlock) addSeries(s *series) { } for i, cm := range s.chunks { + if b.minTime == -1 || cm.MinTime < b.minTime { + b.minTime = cm.MinTime + } + if b.maxTime == -1 || cm.MaxTime < b.maxTime { + b.maxTime = cm.MaxTime + } s.chunks[i].Ref = b.numberOfChunks b.chunks = append(b.chunks, cm.Chunk) b.numberOfChunks++ } } +func (b *memBlock) MinTime() int64 { + if b.minTime == -1 { + return 0 + } + + return b.minTime +} + +func (b *memBlock) MaxTime() int64 { + if b.maxTime == -1 { + return 0 + } + + return b.maxTime +} + func (b *memBlock) Postings(name, val string) (index.Postings, error) { allName, allVal := index.AllPostingsKey() diff --git a/pkg/compact/downsample/streamed_block_writer.go b/pkg/compact/downsample/streamed_block_writer.go index 2e2921ac34..896550a378 100644 --- a/pkg/compact/downsample/streamed_block_writer.go +++ b/pkg/compact/downsample/streamed_block_writer.go @@ -167,26 +167,20 @@ func (w *streamedBlockWriter) Close() error { if w.finalized { return nil } - - var merr tsdb.MultiError w.finalized = true - // Finalise data block only if there wasn't any internal errors. - if !w.ignoreFinalize { - merr.Add(w.finalize()) - } + merr := tsdb.MultiError{} - for _, cl := range w.closers { - merr.Add(cl.Close()) + if w.ignoreFinalize { + // Close open file descriptors anyway. + for _, cl := range w.closers { + merr.Add(cl.Close()) + } + return merr.Err() } - return errors.Wrap(merr.Err(), "close closers") -} + // Finalize saves prepared index and metadata to corresponding files. -// finalize saves prepared index and meta data to corresponding files. -// It is called on Close. Even if an error happened outside of StreamWriter, it will finalize the block anyway, -// so it's a caller's responsibility to remove the block's directory. -func (w *streamedBlockWriter) finalize() error { if err := w.writeLabelSets(); err != nil { return errors.Wrap(err, "write label sets") } @@ -195,6 +189,18 @@ func (w *streamedBlockWriter) finalize() error { return errors.Wrap(err, "write mem postings") } + for _, cl := range w.closers { + merr.Add(cl.Close()) + } + + if err := block.WriteIndexCache( + w.logger, + filepath.Join(w.blockDir, block.IndexFilename), + filepath.Join(w.blockDir, block.IndexCacheFilename), + ); err != nil { + return errors.Wrap(err, "write index cache") + } + if err := w.writeMetaFile(); err != nil { return errors.Wrap(err, "write meta meta") } @@ -203,8 +209,14 @@ func (w *streamedBlockWriter) finalize() error { return errors.Wrap(err, "sync blockDir") } + if err := merr.Err(); err != nil { + return errors.Wrap(err, "finalize") + } + + // No error, claim success. + level.Info(w.logger).Log( - "msg", "write downsampled block", + "msg", "finalized downsampled block", "mint", w.meta.MinTime, "maxt", w.meta.MaxTime, "ulid", w.meta.ULID, @@ -220,7 +232,7 @@ func (w *streamedBlockWriter) syncDir() (err error) { return errors.Wrap(err, "open temporary block blockDir") } - defer runutil.CloseWithErrCapture(w.logger, &err, df, "close temporary block blockDir") + defer runutil.CloseWithErrCapture(&err, df, "close temporary block blockDir") if err := fileutil.Fsync(df); err != nil { return errors.Wrap(err, "sync temporary blockDir") diff --git a/pkg/discovery/dns/miekgdns/lookup.go b/pkg/discovery/dns/miekgdns/lookup.go new file mode 100644 index 0000000000..831eae64d7 --- /dev/null +++ b/pkg/discovery/dns/miekgdns/lookup.go @@ -0,0 +1,149 @@ +package miekgdns + +import ( + "bytes" + "net" + + "github.com/miekg/dns" + "github.com/pkg/errors" +) + +// Copied and slightly adjusted from Prometheus DNS SD: +// https://github.com/prometheus/prometheus/blob/be3c082539d85908ce03b6d280f83343e7c930eb/discovery/dns/dns.go#L212 + +// lookupWithSearchPath tries to get an answer for various permutations of +// the given name, appending the system-configured search path as necessary. +// +// There are three possible outcomes: +// +// 1. One of the permutations of the given name is recognized as +// "valid" by the DNS, in which case we consider ourselves "done" +// and that answer is returned. Note that, due to the way the DNS +// handles "name has resource records, but none of the specified type", +// the answer received may have an empty set of results. +// +// 2. All of the permutations of the given name are responded to by one of +// the servers in the "nameservers" list with the answer "that name does +// not exist" (NXDOMAIN). In that case, it can be considered +// pseudo-authoritative that there are no records for that name. +// +// 3. One or more of the names was responded to by all servers with some +// sort of error indication. In that case, we can't know if, in fact, +// there are records for the name or not, so whatever state the +// configuration is in, we should keep it that way until we know for +// sure (by, presumably, all the names getting answers in the future). +// +// Outcomes 1 and 2 are indicated by a valid response message (possibly an +// empty one) and no error. Outcome 3 is indicated by an error return. The +// error will be generic-looking, because trying to return all the errors +// returned by the combination of all name permutations and servers is a +// nightmare. +func (r *Resolver) lookupWithSearchPath(name string, qtype dns.Type) (*dns.Msg, error) { + conf, err := dns.ClientConfigFromFile(r.ResolvConf) + if err != nil { + return nil, errors.Wrapf(err, "could not load resolv.conf: %s", err) + } + + var errs []error + for _, lname := range conf.NameList(name) { + response, err := lookupFromAnyServer(lname, qtype, conf) + if err != nil { + // We can't go home yet, because a later name + // may give us a valid, successful answer. However + // we can no longer say "this name definitely doesn't + // exist", because we did not get that answer for + // at least one name. + errs = append(errs, err) + continue + } + + if response.Rcode == dns.RcodeSuccess { + // Outcome 1: GOLD! + return response, nil + } + } + + if len(errs) == 0 { + // Outcome 2: everyone says NXDOMAIN. + return &dns.Msg{}, nil + } + // Outcome 3: boned. + return nil, errors.Errorf("could not resolve %q: all servers responded with errors to at least one search domain. Errs %s", name, fmtErrs(errs)) +} + +// lookupFromAnyServer uses all configured servers to try and resolve a specific +// name. If a viable answer is received from a server, then it is +// immediately returned, otherwise the other servers in the config are +// tried, and if none of them return a viable answer, an error is returned. +// +// A "viable answer" is one which indicates either: +// +// 1. "yes, I know that name, and here are its records of the requested type" +// (RCODE==SUCCESS, ANCOUNT > 0); +// 2. "yes, I know that name, but it has no records of the requested type" +// (RCODE==SUCCESS, ANCOUNT==0); or +// 3. "I know that name doesn't exist" (RCODE==NXDOMAIN). +// +// A non-viable answer is "anything else", which encompasses both various +// system-level problems (like network timeouts) and also +// valid-but-unexpected DNS responses (SERVFAIL, REFUSED, etc). +func lookupFromAnyServer(name string, qtype dns.Type, conf *dns.ClientConfig) (*dns.Msg, error) { + client := &dns.Client{} + + var errs []error + + // TODO(bwplotka): Worth to do fanout and grab fastest as golang native lib? + for _, server := range conf.Servers { + servAddr := net.JoinHostPort(server, conf.Port) + msg, err := askServerForName(name, qtype, client, servAddr, true) + if err != nil { + errs = append(errs, errors.Wrapf(err, "resolution against server %s for %s", server, name)) + continue + } + + if msg.Rcode == dns.RcodeSuccess || msg.Rcode == dns.RcodeNameError { + return msg, nil + } + } + + return nil, errors.Errorf("could not resolve %s: no servers returned a viable answer. Errs %v", name, fmtErrs(errs)) +} + +func fmtErrs(errs []error) string { + b := bytes.Buffer{} + for _, err := range errs { + b.WriteString(";") + b.WriteString(err.Error()) + } + return b.String() +} + +// askServerForName makes a request to a specific DNS server for a specific +// name (and qtype). Retries with TCP in the event of response truncation, +// but otherwise just sends back whatever the server gave, whether that be a +// valid-looking response, or an error. +func askServerForName(name string, qType dns.Type, client *dns.Client, servAddr string, edns bool) (*dns.Msg, error) { + msg := &dns.Msg{} + + msg.SetQuestion(dns.Fqdn(name), uint16(qType)) + if edns { + msg.SetEdns0(dns.DefaultMsgSize, false) + } + + response, _, err := client.Exchange(msg, servAddr) + if err != nil { + return nil, errors.Wrapf(err, "exchange") + } + + if response.Truncated { + if client.Net == "tcp" { + return nil, errors.Errorf("got truncated message on TCP (64kiB limit exceeded?)") + } + + // TCP fallback. + client.Net = "tcp" + return askServerForName(name, qType, client, servAddr, false) + } + + return response, nil +} diff --git a/pkg/discovery/dns/miekgdns/resolver.go b/pkg/discovery/dns/miekgdns/resolver.go new file mode 100644 index 0000000000..06b0b25a1a --- /dev/null +++ b/pkg/discovery/dns/miekgdns/resolver.go @@ -0,0 +1,71 @@ +package miekgdns + +import ( + "context" + "net" + + "github.com/miekg/dns" + "github.com/pkg/errors" +) + +// DefaultResolvConfPath is a common, default resolv.conf file present on linux server. +const DefaultResolvConfPath = "/etc/resolv.conf" + +// Resolver is a drop-in Resolver for *part* of std lib Golang net.DefaultResolver methods. +type Resolver struct { + ResolvConf string +} + +func (r *Resolver) LookupSRV(ctx context.Context, service, proto, name string) (cname string, addrs []*net.SRV, err error) { + var target string + if service == "" && proto == "" { + target = name + } else { + target = "_" + service + "._" + proto + "." + name + } + + response, err := r.lookupWithSearchPath(target, dns.Type(dns.TypeSRV)) + if err != nil { + return "", nil, err + } + + for _, record := range response.Answer { + switch addr := record.(type) { + case *dns.SRV: + addrs = append(addrs, &net.SRV{ + Weight: addr.Weight, + Target: addr.Target, + Priority: addr.Priority, + Port: addr.Port, + }) + default: + return "", nil, errors.Errorf("invalid SRV response record %s", record) + } + } + + return "", addrs, nil +} + +func (r *Resolver) LookupIPAddr(ctx context.Context, host string) ([]net.IPAddr, error) { + response, err := r.lookupWithSearchPath(host, dns.Type(dns.TypeAAAA)) + if err != nil || len(response.Answer) == 0 { + // Ugly fallback to A lookup. + response, err = r.lookupWithSearchPath(host, dns.Type(dns.TypeA)) + if err != nil { + return nil, err + } + } + + var resp []net.IPAddr + for _, record := range response.Answer { + switch addr := record.(type) { + case *dns.A: + resp = append(resp, net.IPAddr{IP: addr.A}) + case *dns.AAAA: + resp = append(resp, net.IPAddr{IP: addr.AAAA}) + default: + return nil, errors.Errorf("invalid A or AAAA response record %s", record) + } + } + return resp, nil +} diff --git a/pkg/discovery/dns/provider.go b/pkg/discovery/dns/provider.go index 7d1e1357c1..c1380ec2ae 100644 --- a/pkg/discovery/dns/provider.go +++ b/pkg/discovery/dns/provider.go @@ -2,11 +2,13 @@ package dns import ( "context" + "net" "strings" "sync" "github.com/go-kit/kit/log" "github.com/go-kit/kit/log/level" + "github.com/improbable-eng/thanos/pkg/discovery/dns/miekgdns" "github.com/prometheus/client_golang/prometheus" ) @@ -22,19 +24,41 @@ type Provider struct { resolverFailuresCount prometheus.Counter } -// NewProvider returns a new empty provider with a default resolver. -func NewProvider(logger log.Logger, reg prometheus.Registerer) *Provider { +type ResolverType string + +const ( + GolangResolverType ResolverType = "golang" + MiekgdnsResolverType ResolverType = "miekgdns" +) + +func (t ResolverType) ToResolver(logger log.Logger) ipLookupResolver { + var r ipLookupResolver + switch t { + case GolangResolverType: + r = net.DefaultResolver + case MiekgdnsResolverType: + r = &miekgdns.Resolver{ResolvConf: miekgdns.DefaultResolvConfPath} + default: + level.Warn(logger).Log("msg", "no such resolver type, defaulting to golang", "type", t) + r = net.DefaultResolver + } + return r +} + +// NewProvider returns a new empty provider with a given resolver type. +// If empty resolver type is net.DefaultResolver.w +func NewProvider(logger log.Logger, reg prometheus.Registerer, resolverType ResolverType) *Provider { p := &Provider{ - resolver: NewResolver(), + resolver: NewResolver(resolverType.ToResolver(logger)), resolved: make(map[string][]string), logger: logger, resolverLookupsCount: prometheus.NewCounter(prometheus.CounterOpts{ - Name: "dns_lookups_total", - Help: "The number of DNS lookups resolutions attempts", + Name: "dns_lookups_total", + Help: "The number of DNS lookups resolutions attempts", }), resolverFailuresCount: prometheus.NewCounter(prometheus.CounterOpts{ - Name: "dns_failures_total", - Help: "The number of DNS lookup failures", + Name: "dns_failures_total", + Help: "The number of DNS lookup failures", }), } diff --git a/pkg/discovery/dns/provider_test.go b/pkg/discovery/dns/provider_test.go index 1856e5b690..6589522b38 100644 --- a/pkg/discovery/dns/provider_test.go +++ b/pkg/discovery/dns/provider_test.go @@ -20,7 +20,7 @@ func TestProvider(t *testing.T) { "127.0.0.5:19095", } - prv := NewProvider(log.NewNopLogger(), nil) + prv := NewProvider(log.NewNopLogger(), nil, "") prv.resolver = &mockResolver{ res: map[string][]string{ "a": ips[:2], diff --git a/pkg/discovery/dns/resolver.go b/pkg/discovery/dns/resolver.go index 7d3c593d5f..709e264e57 100644 --- a/pkg/discovery/dns/resolver.go +++ b/pkg/discovery/dns/resolver.go @@ -12,17 +12,18 @@ import ( type QType string const ( - A = QType("dns") - SRV = QType("dnssrv") + // A qtype performs A/AAAA lookup. + A = QType("dns") + // SRV qtype performs SRV lookup with A/AAAA lookup for each SRV result. + SRV = QType("dnssrv") + // SRVNoA qtype performs SRV lookup without any A/AAAA lookup for each SRV result. SRVNoA = QType("dnssrvnoa") ) type Resolver interface { - // Resolve performs a DNS lookup and returns a list of records. // name is the domain name to be resolved. // qtype is the query type. Accepted values are `dns` for A/AAAA lookup and `dnssrv` for SRV lookup. - // If qtype is `dns`, the domain name to be resolved requires a port or an error will be returned. // If scheme is passed through name, it is preserved on IP results. Resolve(ctx context.Context, name string, qtype QType) ([]string, error) } @@ -36,9 +37,9 @@ type dnsSD struct { resolver ipLookupResolver } -// NewResolver provides a resolver with a specific net.Resolver. If resolver is nil, the default resolver will be used. -func NewResolver() Resolver { - return &dnsSD{resolver: net.DefaultResolver} +// NewResolver creates a resolver with given underlying resolver. +func NewResolver(resolver ipLookupResolver) Resolver { + return &dnsSD{resolver: resolver} } func (s *dnsSD) Resolve(ctx context.Context, name string, qtype QType) ([]string, error) { @@ -72,17 +73,23 @@ func (s *dnsSD) Resolve(ctx context.Context, name string, qtype QType) ([]string for _, ip := range ips { res = append(res, appendScheme(scheme, net.JoinHostPort(ip.String(), port))) } - case SRV: + case SRV, SRVNoA: _, recs, err := s.resolver.LookupSRV(ctx, "", "", host) if err != nil { return nil, errors.Wrapf(err, "lookup SRV records %q", host) } + for _, rec := range recs { // Only use port from SRV record if no explicit port was specified. resPort := port if resPort == "" { resPort = strconv.Itoa(int(rec.Port)) } + + if qtype == SRVNoA { + res = append(res, appendScheme(scheme, net.JoinHostPort(rec.Target, resPort))) + continue + } // Do A lookup for the domain in SRV answer. resIPs, err := s.resolver.LookupIPAddr(ctx, rec.Target) if err != nil { @@ -92,19 +99,6 @@ func (s *dnsSD) Resolve(ctx context.Context, name string, qtype QType) ([]string res = append(res, appendScheme(scheme, net.JoinHostPort(resIP.String(), resPort))) } } - case SRVNoA: - _, recs, err := s.resolver.LookupSRV(ctx, "", "", host) - if err != nil { - return nil, errors.Wrapf(err, "lookup SRV records %q", host) - } - for _, rec := range recs { - // Only use port from SRV record if no explicit port was specified. - resPort := port - if resPort == "" { - resPort = strconv.Itoa(int(rec.Port)) - } - res = append(res, appendScheme(scheme, net.JoinHostPort(rec.Target, resPort))) - } default: return nil, errors.Errorf("invalid lookup scheme %q", qtype) } diff --git a/pkg/discovery/dns/resolver_test.go b/pkg/discovery/dns/resolver_test.go index 11d2986f5e..2489d027a0 100644 --- a/pkg/discovery/dns/resolver_test.go +++ b/pkg/discovery/dns/resolver_test.go @@ -185,10 +185,10 @@ func testDnsSd(t *testing.T, tt DNSSDTest) { result, err := dnsSD.Resolve(ctx, tt.addr, tt.qtype) if tt.expectedErr != nil { - testutil.Assert(t, err != nil, "expected error but none was returned") + testutil.NotOk(t, err) testutil.Assert(t, tt.expectedErr.Error() == err.Error(), "expected error '%v', but got '%v'", tt.expectedErr.Error(), err.Error()) } else { - testutil.Assert(t, err == nil, "expected no error but got %v", err) + testutil.Ok(t, err) } sort.Strings(result) testutil.Equals(t, tt.expectedResult, result) diff --git a/pkg/objstore/azure/azure.go b/pkg/objstore/azure/azure.go index d0c50524c8..266e3f55f7 100644 --- a/pkg/objstore/azure/azure.go +++ b/pkg/objstore/azure/azure.go @@ -18,11 +18,7 @@ import ( ) const ( - opObjectsList = "ListBucket" - opObjectInsert = "PutObject" - opObjectGet = "GetObject" - opObjectHead = "HeadObject" - opObjectDelete = "DeleteObject" + azureDefaultEndpoint = "blob.core.windows.net" ) // Config Azure storage configuration. @@ -30,6 +26,7 @@ type Config struct { StorageAccountName string `yaml:"storage_account"` StorageAccountKey string `yaml:"storage_account_key"` ContainerName string `yaml:"container"` + Endpoint string `yaml:"endpoint"` } // Bucket implements the store.Bucket interface against Azure APIs. @@ -45,6 +42,18 @@ func (conf *Config) validate() error { conf.StorageAccountKey == "" { return errors.New("invalid Azure storage configuration") } + if conf.StorageAccountName == "" && conf.StorageAccountKey != "" { + return errors.New("no Azure storage_account specified while storage_account_key is present in config file; both should be present.") + } + if conf.StorageAccountName != "" && conf.StorageAccountKey == "" { + return errors.New("no Azure storage_account_key specified while storage_account is present in config file; both should be present.") + } + if conf.ContainerName == "" { + return errors.New("no Azure container specified") + } + if conf.Endpoint == "" { + conf.Endpoint = azureDefaultEndpoint + } return nil } @@ -62,7 +71,7 @@ func NewBucket(logger log.Logger, azureConfig []byte, component string) (*Bucket } ctx := context.Background() - container, err := createContainer(ctx, conf.StorageAccountName, conf.StorageAccountKey, conf.ContainerName) + container, err := createContainer(ctx, conf) if err != nil { ret, ok := err.(blob.StorageError) if !ok { @@ -70,7 +79,7 @@ func NewBucket(logger log.Logger, azureConfig []byte, component string) (*Bucket } if ret.ServiceCode() == "ContainerAlreadyExists" { level.Debug(logger).Log("msg", "Getting connection to existing Azure blob container", "container", conf.ContainerName) - container, err = getContainer(ctx, conf.StorageAccountName, conf.StorageAccountKey, conf.ContainerName) + container, err = getContainer(ctx, conf) if err != nil { return nil, errors.Wrapf(err, "cannot get existing Azure blob container: %s", container) } @@ -166,7 +175,7 @@ func (b *Bucket) getBlobReader(ctx context.Context, name string, offset, length return nil, errors.New("X-Ms-Error-Code: [BlobNotFound]") } - blobURL, err := getBlobURL(ctx, b.config.StorageAccountName, b.config.StorageAccountKey, b.config.ContainerName, name) + blobURL, err := getBlobURL(ctx, *b.config, name) if err != nil { return nil, errors.Wrapf(err, "cannot get Azure blob URL, address: %s", name) } @@ -211,7 +220,7 @@ func (b *Bucket) GetRange(ctx context.Context, name string, off, length int64) ( // Exists checks if the given object exists. func (b *Bucket) Exists(ctx context.Context, name string) (bool, error) { level.Debug(b.logger).Log("msg", "check if blob exists", "blob", name) - blobURL, err := getBlobURL(ctx, b.config.StorageAccountName, b.config.StorageAccountKey, b.config.ContainerName, name) + blobURL, err := getBlobURL(ctx, *b.config, name) if err != nil { return false, errors.Wrapf(err, "cannot get Azure blob URL, address: %s", name) } @@ -229,7 +238,7 @@ func (b *Bucket) Exists(ctx context.Context, name string) (bool, error) { // Upload the contents of the reader as an object into the bucket. func (b *Bucket) Upload(ctx context.Context, name string, r io.Reader) error { level.Debug(b.logger).Log("msg", "Uploading blob", "blob", name) - blobURL, err := getBlobURL(ctx, b.config.StorageAccountName, b.config.StorageAccountKey, b.config.ContainerName, name) + blobURL, err := getBlobURL(ctx, *b.config, name) if err != nil { return errors.Wrapf(err, "cannot get Azure blob URL, address: %s", name) } @@ -247,7 +256,7 @@ func (b *Bucket) Upload(ctx context.Context, name string, r io.Reader) error { // Delete removes the object with the given name. func (b *Bucket) Delete(ctx context.Context, name string) error { level.Debug(b.logger).Log("msg", "Deleting blob", "blob", name) - blobURL, err := getBlobURL(ctx, b.config.StorageAccountName, b.config.StorageAccountKey, b.config.ContainerName, name) + blobURL, err := getBlobURL(ctx, *b.config, name) if err != nil { return errors.Wrapf(err, "cannot get Azure blob URL, address: %s", name) } diff --git a/pkg/objstore/azure/azure_test.go b/pkg/objstore/azure/azure_test.go new file mode 100644 index 0000000000..2574896213 --- /dev/null +++ b/pkg/objstore/azure/azure_test.go @@ -0,0 +1,87 @@ +package azure + +import ( + "testing" + + "github.com/improbable-eng/thanos/pkg/testutil" +) + +func TestConfig_validate(t *testing.T) { + type fields struct { + StorageAccountName string + StorageAccountKey string + ContainerName string + Endpoint string + } + tests := []struct { + name string + fields fields + wantErr bool + wantEndpoint string + }{ + { + name: "valid global configuration", + fields: fields{ + StorageAccountName: "foo", + StorageAccountKey: "bar", + ContainerName: "roo", + }, + wantErr: false, + wantEndpoint: azureDefaultEndpoint, + }, + { + name: "valid custom endpoint", + fields: fields{ + StorageAccountName: "foo", + StorageAccountKey: "bar", + ContainerName: "roo", + Endpoint: "blob.core.chinacloudapi.cn", + }, + wantErr: false, + wantEndpoint: "blob.core.chinacloudapi.cn", + }, + { + name: "no account key but account name", + fields: fields{ + StorageAccountName: "foo", + StorageAccountKey: "", + ContainerName: "roo", + }, + wantErr: true, + }, + { + name: "no account name but account key", + fields: fields{ + StorageAccountName: "", + StorageAccountKey: "bar", + ContainerName: "roo", + }, + wantErr: true, + }, + { + name: "no container name", + fields: fields{ + StorageAccountName: "foo", + StorageAccountKey: "bar", + ContainerName: "", + }, + wantErr: true, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + conf := &Config{ + StorageAccountName: tt.fields.StorageAccountName, + StorageAccountKey: tt.fields.StorageAccountKey, + ContainerName: tt.fields.ContainerName, + Endpoint: tt.fields.Endpoint, + } + err := conf.validate() + if (err != nil) != tt.wantErr { + t.Errorf("Config.validate() error = %v, wantErr %v", err, tt.wantErr) + } else { + testutil.Equals(t, tt.wantEndpoint, conf.Endpoint) + } + }) + } +} diff --git a/pkg/objstore/azure/helpers.go b/pkg/objstore/azure/helpers.go index da6e96f89c..4e16c31c0b 100644 --- a/pkg/objstore/azure/helpers.go +++ b/pkg/objstore/azure/helpers.go @@ -5,36 +5,42 @@ import ( "fmt" "net/url" "regexp" + "time" blob "github.com/Azure/azure-storage-blob-go/azblob" ) -var ( - blobFormatString = `https://%s.blob.core.windows.net` -) - // DirDelim is the delimiter used to model a directory structure in an object store bucket. const DirDelim = "/" -func getContainerURL(ctx context.Context, accountName, accountKey, containerName string) (blob.ContainerURL, error) { - c, err := blob.NewSharedKeyCredential(accountName, accountKey) +var errorCodeRegex = regexp.MustCompile(`X-Ms-Error-Code:\D*\[(\w+)\]`) + +func getContainerURL(ctx context.Context, conf Config) (blob.ContainerURL, error) { + c, err := blob.NewSharedKeyCredential(conf.StorageAccountName, conf.StorageAccountKey) if err != nil { return blob.ContainerURL{}, err } + + retryOptions := blob.RetryOptions{} + if deadline, ok := ctx.Deadline(); ok { + retryOptions.TryTimeout = deadline.Sub(time.Now()) + } + p := blob.NewPipeline(c, blob.PipelineOptions{ + Retry: retryOptions, Telemetry: blob.TelemetryOptions{Value: "Thanos"}, }) - u, err := url.Parse(fmt.Sprintf(blobFormatString, accountName)) + u, err := url.Parse(fmt.Sprintf("https://%s.%s", conf.StorageAccountName, conf.Endpoint)) if err != nil { return blob.ContainerURL{}, err } service := blob.NewServiceURL(*u, p) - return service.NewContainerURL(containerName), nil + return service.NewContainerURL(conf.ContainerName), nil } -func getContainer(ctx context.Context, accountName, accountKey, containerName string) (blob.ContainerURL, error) { - c, err := getContainerURL(ctx, accountName, accountKey, containerName) +func getContainer(ctx context.Context, conf Config) (blob.ContainerURL, error) { + c, err := getContainerURL(ctx, conf) if err != nil { return blob.ContainerURL{}, err } @@ -43,20 +49,20 @@ func getContainer(ctx context.Context, accountName, accountKey, containerName st return c, err } -func createContainer(ctx context.Context, accountName, accountKey, containerName string) (blob.ContainerURL, error) { - c, err := getContainerURL(ctx, accountName, accountKey, containerName) +func createContainer(ctx context.Context, conf Config) (blob.ContainerURL, error) { + c, err := getContainerURL(ctx, conf) if err != nil { return blob.ContainerURL{}, err } _, err = c.Create( - context.Background(), + ctx, blob.Metadata{}, blob.PublicAccessNone) return c, err } -func getBlobURL(ctx context.Context, accountName, accountKey, containerName, blobName string) (blob.BlockBlobURL, error) { - c, err := getContainerURL(ctx, accountName, accountKey, containerName) +func getBlobURL(ctx context.Context, conf Config, blobName string) (blob.BlockBlobURL, error) { + c, err := getContainerURL(ctx, conf) if err != nil { return blob.BlockBlobURL{}, err } @@ -64,8 +70,7 @@ func getBlobURL(ctx context.Context, accountName, accountKey, containerName, blo } func parseError(errorCode string) string { - re, _ := regexp.Compile(`X-Ms-Error-Code:\D*\[(\w+)\]`) - match := re.FindStringSubmatch(errorCode) + match := errorCodeRegex.FindStringSubmatch(errorCode) if match != nil && len(match) == 2 { return match[1] } diff --git a/pkg/objstore/azure/helpers_test.go b/pkg/objstore/azure/helpers_test.go new file mode 100644 index 0000000000..84d47f4711 --- /dev/null +++ b/pkg/objstore/azure/helpers_test.go @@ -0,0 +1,58 @@ +package azure + +import ( + "context" + "testing" + + "github.com/improbable-eng/thanos/pkg/testutil" +) + +func Test_getContainerURL(t *testing.T) { + type args struct { + conf Config + } + tests := []struct { + name string + args args + want string + wantErr bool + }{ + { + name: "default", + args: args{ + conf: Config{ + StorageAccountName: "foo", + StorageAccountKey: "Zm9vCg==", + ContainerName: "roo", + Endpoint: azureDefaultEndpoint, + }, + }, + want: "https://foo.blob.core.windows.net/roo", + wantErr: false, + }, + { + name: "azure china", + args: args{ + conf: Config{ + StorageAccountName: "foo", + StorageAccountKey: "Zm9vCg==", + ContainerName: "roo", + Endpoint: "blob.core.chinacloudapi.cn", + }, + }, + want: "https://foo.blob.core.chinacloudapi.cn/roo", + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + ctx := context.Background() + got, err := getContainerURL(ctx, tt.args.conf) + if (err != nil) != tt.wantErr { + t.Errorf("getContainerURL() error = %v, wantErr %v", err, tt.wantErr) + return + } + testutil.Equals(t, tt.want, got.String()) + }) + } +} diff --git a/pkg/objstore/gcs/gcs.go b/pkg/objstore/gcs/gcs.go index 7743b9449f..e4f6a7295a 100644 --- a/pkg/objstore/gcs/gcs.go +++ b/pkg/objstore/gcs/gcs.go @@ -54,7 +54,7 @@ func NewBucket(ctx context.Context, logger log.Logger, conf []byte, component st // If ServiceAccount is provided, use them in GCS client, otherwise fallback to Google default logic. if gc.ServiceAccount != "" { - credentials, err := google.CredentialsFromJSON(ctx, []byte(gc.ServiceAccount)) + credentials, err := google.CredentialsFromJSON(ctx, []byte(gc.ServiceAccount), storage.ScopeFullControl) if err != nil { return nil, errors.Wrap(err, "failed to create credentials from JSON") } diff --git a/pkg/objstore/objstore.go b/pkg/objstore/objstore.go index 4b241e972a..81d52d7d45 100644 --- a/pkg/objstore/objstore.go +++ b/pkg/objstore/objstore.go @@ -106,7 +106,7 @@ func DeleteDir(ctx context.Context, bkt Bucket, dir string) error { // DownloadFile downloads the src file from the bucket to dst. If dst is an existing // directory, a file with the same name as the source is created in dst. // If destination file is already existing, download file will overwrite it. -func DownloadFile(ctx context.Context, logger log.Logger, bkt BucketReader, src, dst string) error { +func DownloadFile(ctx context.Context, logger log.Logger, bkt BucketReader, src, dst string) (err error) { if fi, err := os.Stat(dst); err == nil { if fi.IsDir() { dst = filepath.Join(dst, filepath.Base(src)) @@ -125,8 +125,6 @@ func DownloadFile(ctx context.Context, logger log.Logger, bkt BucketReader, src, if err != nil { return errors.Wrap(err, "create file") } - defer runutil.CloseWithLogOnErr(logger, f, "download block's output file") - defer func() { if err != nil { if rerr := os.Remove(dst); rerr != nil { @@ -134,6 +132,8 @@ func DownloadFile(ctx context.Context, logger log.Logger, bkt BucketReader, src, } } }() + defer runutil.CloseWithLogOnErr(logger, f, "download block's output file") + if _, err = io.Copy(f, rc); err != nil { return errors.Wrap(err, "copy object to file") } @@ -170,6 +170,23 @@ func DownloadDir(ctx context.Context, logger log.Logger, bkt BucketReader, src, return nil } +// Exists returns true, if file exists, otherwise false and nil error if presence IsObjNotFoundErr, otherwise false with +// returning error. +func Exists(ctx context.Context, bkt Bucket, src string) (bool, error) { + rc, err := bkt.Get(ctx, src) + if rc != nil { + _ = rc.Close() + } + if err != nil { + if bkt.IsObjNotFoundErr(err) { + return false, nil + } + return false, errors.Wrap(err, "stat object") + } + + return true, nil +} + // BucketWithMetrics takes a bucket and registers metrics with the given registry for // operations run against the bucket. func BucketWithMetrics(name string, b Bucket, r prometheus.Registerer) Bucket { diff --git a/pkg/objstore/s3/s3.go b/pkg/objstore/s3/s3.go index c0fe3b98d9..115b6c4c20 100644 --- a/pkg/objstore/s3/s3.go +++ b/pkg/objstore/s3/s3.go @@ -37,6 +37,7 @@ const DirDelim = "/" type Config struct { Bucket string `yaml:"bucket"` Endpoint string `yaml:"endpoint"` + Region string `yaml:"region"` AccessKey string `yaml:"access_key"` Insecure bool `yaml:"insecure"` SignatureV2 bool `yaml:"signature_version2"` @@ -122,7 +123,7 @@ func NewBucketWithConfig(logger log.Logger, config Config, component string) (*B } } - client, err := minio.NewWithCredentials(config.Endpoint, credentials.NewChainCredentials(chain), !config.Insecure, "") + client, err := minio.NewWithCredentials(config.Endpoint, credentials.NewChainCredentials(chain), !config.Insecure, config.Region) if err != nil { return nil, errors.Wrap(err, "initialize s3 client") } diff --git a/pkg/promclient/promclient.go b/pkg/promclient/promclient.go index 1622a16340..efc35bbafe 100644 --- a/pkg/promclient/promclient.go +++ b/pkg/promclient/promclient.go @@ -23,6 +23,7 @@ import ( "github.com/go-kit/kit/log" "github.com/go-kit/kit/log/level" + "github.com/hashicorp/go-version" "github.com/improbable-eng/thanos/pkg/runutil" "github.com/improbable-eng/thanos/pkg/store/storepb" "github.com/improbable-eng/thanos/pkg/tracing" @@ -35,6 +36,8 @@ import ( yaml "gopkg.in/yaml.v2" ) +var FlagsVersion = version.Must(version.NewVersion("2.2.0")) + // IsWALFileAccesible returns no error if WAL dir can be found. This helps to tell // if we have access to Prometheus TSDB directory. func IsWALDirAccesible(dir string) error { @@ -70,7 +73,7 @@ func ExternalLabels(ctx context.Context, logger log.Logger, base *url.URL) (labe b, err := ioutil.ReadAll(resp.Body) if err != nil { - return nil, errors.Errorf("failed to read body") + return nil, errors.New("failed to read body") } if resp.StatusCode != 200 { @@ -186,7 +189,7 @@ func ConfiguredFlags(ctx context.Context, logger log.Logger, base *url.URL) (Fla b, err := ioutil.ReadAll(resp.Body) if err != nil { - return Flags{}, errors.Errorf("failed to read body") + return Flags{}, errors.New("failed to read body") } if resp.StatusCode != 200 { @@ -230,7 +233,7 @@ func Snapshot(ctx context.Context, logger log.Logger, base *url.URL, skipHead bo b, err := ioutil.ReadAll(resp.Body) if err != nil { - return "", errors.Errorf("failed to read body") + return "", errors.New("failed to read body") } if resp.StatusCode != 200 { @@ -376,6 +379,32 @@ func PromqlQueryInstant(ctx context.Context, logger log.Logger, base *url.URL, q return vec, warnings, nil } +// PromVersion will return the version of Prometheus by querying /version Prometheus endpoint. +func PromVersion(logger log.Logger, base *url.URL) (*version.Version, error) { + if logger == nil { + logger = log.NewNopLogger() + } + + u := *base + u.Path = path.Join(u.Path, "/version") + resp, err := http.Get(u.String()) + if err != nil { + return nil, errors.Wrapf(err, "request version against %s", u.String()) + } + defer runutil.CloseWithLogOnErr(logger, resp.Body, "query body") + + b, err := ioutil.ReadAll(resp.Body) + if err != nil { + return nil, errors.New("failed to read body") + } + + if resp.StatusCode != 200 { + return nil, errors.Errorf("got non-200 response code: %v, response: %v", resp.StatusCode, string(b)) + } + + return parseVersion(b) +} + // Scalar response consists of array with mixed types so it needs to be // unmarshaled separately. func convertScalarJSONToVector(scalarJSONResult json.RawMessage) (model.Vector, error) { @@ -478,3 +507,25 @@ func MetricValues(ctx context.Context, logger log.Logger, base *url.URL, perMetr } } } + +// parseVersion converts string to version.Version. +func parseVersion(data []byte) (*version.Version, error) { + var m struct { + Version string `json:"version"` + } + if err := json.Unmarshal(data, &m); err != nil { + return nil, errors.Wrapf(err, "unmarshal response: %v", string(data)) + } + + // Prometheus is built with nil version. + if strings.TrimSpace(m.Version) == "" { + return nil, nil + } + + ver, err := version.NewVersion(m.Version) + if err != nil { + return nil, errors.Wrapf(err, "failed to parse version %s", m.Version) + } + + return ver, nil +} diff --git a/pkg/promclient/promclient_e2e_test.go b/pkg/promclient/promclient_e2e_test.go index 1cb7c968af..c2b35fdca6 100644 --- a/pkg/promclient/promclient_e2e_test.go +++ b/pkg/promclient/promclient_e2e_test.go @@ -11,6 +11,7 @@ import ( "time" "github.com/go-kit/kit/log" + "github.com/hashicorp/go-version" "github.com/improbable-eng/thanos/pkg/runutil" "github.com/improbable-eng/thanos/pkg/testutil" "github.com/oklog/ulid" @@ -72,8 +73,8 @@ func TestConfiguredFlags_e2e(t *testing.T) { testutil.Assert(t, !flags.WebEnableLifecycle, "") testutil.Equals(t, p.Dir(), flags.TSDBPath) testutil.Equals(t, int64(2*time.Hour), int64(flags.TSDBMinTime)) - testutil.Equals(t, int64(36*time.Hour), int64(flags.TSDBMaxTime)) - testutil.Equals(t, int64(15*24*time.Hour), int64(flags.TSDBRetention)) + testutil.Equals(t, int64(4.8*float64(time.Hour)), int64(flags.TSDBMaxTime)) + testutil.Equals(t, int64(2*24*time.Hour), int64(flags.TSDBRetention)) }) } @@ -81,8 +82,10 @@ func TestSnapshot_e2e(t *testing.T) { testutil.ForeachPrometheus(t, func(t testing.TB, p *testutil.Prometheus) { now := time.Now() + ctx := context.Background() // Create artificial block. id, err := testutil.CreateBlockWithTombstone( + ctx, p.Dir(), []labels.Labels{labels.FromStrings("a", "b")}, 10, @@ -99,7 +102,10 @@ func TestSnapshot_e2e(t *testing.T) { u, err := url.Parse(fmt.Sprintf("http://%s", p.Addr())) testutil.Ok(t, err) - dir, err := Snapshot(context.Background(), log.NewNopLogger(), u, false) + // Prometheus since 2.7.0 don't write empty blocks even if it's head block. So it's no matter passing skip_head true or false here + // Pass skipHead = true to support all prometheus versions and assert that snapshot creates only one file + // https://github.com/prometheus/tsdb/pull/374 + dir, err := Snapshot(ctx, log.NewNopLogger(), u, true) testutil.Ok(t, err) _, err = os.Stat(path.Join(p.Dir(), dir, id.String())) @@ -113,7 +119,7 @@ func TestSnapshot_e2e(t *testing.T) { testutil.Ok(t, err) } - testutil.Equals(t, 2, len(files)) + testutil.Equals(t, 1, len(files)) }) } @@ -142,3 +148,36 @@ func TestRule_UnmarshalScalarResponse(t *testing.T) { vectorResult, err = convertScalarJSONToVector(invalidDataScalarJSONResult) testutil.NotOk(t, err) } + +func TestParseVersion(t *testing.T) { + promVersions := map[string]string{ + "": promVersionResp(""), + "2.2.0": promVersionResp("2.2.0"), + "2.3.0": promVersionResp("2.3.0"), + "2.3.0-rc.0": promVersionResp("2.3.0-rc.0"), + } + + promMalformedVersions := map[string]string{ + "foo": promVersionResp("foo"), + "bar": promVersionResp("bar"), + } + + for v, resp := range promVersions { + gotVersion, err := parseVersion([]byte(resp)) + testutil.Ok(t, err) + expectVersion, _ := version.NewVersion(v) + testutil.Equals(t, gotVersion, expectVersion) + } + + for v, resp := range promMalformedVersions { + gotVersion, err := parseVersion([]byte(resp)) + testutil.NotOk(t, err) + expectVersion, _ := version.NewVersion(v) + testutil.Equals(t, gotVersion, expectVersion) + } +} + +// promVersionResp returns the response of Prometheus /version endpoint. +func promVersionResp(ver string) string { + return fmt.Sprintf(`{"version":"%s","revision":"","branch":"","buildUser":"","buildDate":"","goVersion":""}`, ver) +} diff --git a/pkg/query/api/v1.go b/pkg/query/api/v1.go index b05a109207..a3908b9db3 100644 --- a/pkg/query/api/v1.go +++ b/pkg/query/api/v1.go @@ -176,6 +176,9 @@ func (api *API) Register(r *route.Router, tracer opentracing.Tracer, logger log. r.Get("/label/:name/values", instr("label_values", api.labelValues)) r.Get("/series", instr("series", api.series)) + r.Post("/series", instr("series", api.series)) + + r.Get("/labels", instr("label_names", api.labelNames)) } type queryData struct { @@ -614,3 +617,35 @@ func parseDuration(s string) (time.Duration, error) { } return 0, fmt.Errorf("cannot parse %q to a valid duration", s) } + +func (api *API) labelNames(r *http.Request) (interface{}, []error, *ApiError) { + ctx := r.Context() + + enablePartialResponse, apiErr := api.parsePartialResponseParam(r) + if apiErr != nil { + return nil, nil, apiErr + } + + var ( + warnmtx sync.Mutex + warnings []error + ) + warningReporter := func(err error) { + warnmtx.Lock() + warnings = append(warnings, err) + warnmtx.Unlock() + } + + q, err := api.queryableCreate(true, 0, enablePartialResponse, warningReporter).Querier(ctx, math.MinInt64, math.MaxInt64) + if err != nil { + return nil, nil, &ApiError{errorExec, err} + } + defer runutil.CloseWithLogOnErr(api.logger, q, "queryable labelNames") + + names, err := q.LabelNames() + if err != nil { + return nil, nil, &ApiError{errorExec, err} + } + + return names, warnings, nil +} diff --git a/pkg/query/api/v1_test.go b/pkg/query/api/v1_test.go index ca5dee2013..3db7803038 100644 --- a/pkg/query/api/v1_test.go +++ b/pkg/query/api/v1_test.go @@ -18,22 +18,23 @@ import ( "encoding/json" "errors" "fmt" + "io" "io/ioutil" "math/rand" "net/http" "net/http/httptest" "net/url" "reflect" + "strings" "testing" "time" - "github.com/prometheus/common/route" - "github.com/go-kit/kit/log" "github.com/improbable-eng/thanos/pkg/query" "github.com/improbable-eng/thanos/pkg/testutil" "github.com/opentracing/opentracing-go" "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/common/route" "github.com/prometheus/prometheus/pkg/labels" "github.com/prometheus/prometheus/pkg/timestamp" "github.com/prometheus/prometheus/promql" @@ -80,6 +81,7 @@ func TestEndpoints(t *testing.T) { endpoint ApiFunc params map[string]string query url.Values + method string response interface{} errType ErrorType }{ @@ -406,6 +408,133 @@ func TestEndpoints(t *testing.T) { }, errType: errorBadData, }, + { + endpoint: api.series, + query: url.Values{ + "match[]": []string{`test_metric2`}, + }, + response: []labels.Labels{ + labels.FromStrings("__name__", "test_metric2", "foo", "boo"), + }, + method: http.MethodPost, + }, + { + endpoint: api.series, + query: url.Values{ + "match[]": []string{`test_metric1{foo=~".+o"}`}, + }, + response: []labels.Labels{ + labels.FromStrings("__name__", "test_metric1", "foo", "boo"), + }, + method: http.MethodPost, + }, + { + endpoint: api.series, + query: url.Values{ + "match[]": []string{`test_metric1{foo=~".+o$"}`, `test_metric1{foo=~".+o"}`}, + }, + response: []labels.Labels{ + labels.FromStrings("__name__", "test_metric1", "foo", "boo"), + }, + method: http.MethodPost, + }, + { + endpoint: api.series, + query: url.Values{ + "match[]": []string{`test_metric1{foo=~".+o"}`, `none`}, + }, + response: []labels.Labels{ + labels.FromStrings("__name__", "test_metric1", "foo", "boo"), + }, + method: http.MethodPost, + }, + // Start and end before series starts. + { + endpoint: api.series, + query: url.Values{ + "match[]": []string{`test_metric2`}, + "start": []string{"-2"}, + "end": []string{"-1"}, + }, + response: []labels.Labels(nil), + }, + // Start and end after series ends. + { + endpoint: api.series, + query: url.Values{ + "match[]": []string{`test_metric2`}, + "start": []string{"100000"}, + "end": []string{"100001"}, + }, + response: []labels.Labels(nil), + }, + // Start before series starts, end after series ends. + { + endpoint: api.series, + query: url.Values{ + "match[]": []string{`test_metric2`}, + "start": []string{"-1"}, + "end": []string{"100000"}, + }, + response: []labels.Labels{ + labels.FromStrings("__name__", "test_metric2", "foo", "boo"), + }, + method: http.MethodPost, + }, + // Start and end within series. + { + endpoint: api.series, + query: url.Values{ + "match[]": []string{`test_metric2`}, + "start": []string{"1"}, + "end": []string{"100"}, + }, + response: []labels.Labels{ + labels.FromStrings("__name__", "test_metric2", "foo", "boo"), + }, + method: http.MethodPost, + }, + // Start within series, end after. + { + endpoint: api.series, + query: url.Values{ + "match[]": []string{`test_metric2`}, + "start": []string{"1"}, + "end": []string{"100000"}, + }, + response: []labels.Labels{ + labels.FromStrings("__name__", "test_metric2", "foo", "boo"), + }, + method: http.MethodPost, + }, + // Start before series, end within series. + { + endpoint: api.series, + query: url.Values{ + "match[]": []string{`test_metric2`}, + "start": []string{"-1"}, + "end": []string{"1"}, + }, + response: []labels.Labels{ + labels.FromStrings("__name__", "test_metric2", "foo", "boo"), + }, + method: http.MethodPost, + }, + // Missing match[] query params in series requests. + { + endpoint: api.series, + errType: errorBadData, + method: http.MethodPost, + }, + { + endpoint: api.series, + query: url.Values{ + "match[]": []string{`test_metric2`}, + "dedup": []string{"sdfsf-series"}, + }, + errType: errorBadData, + method: http.MethodPost, + }, } for _, test := range tests { @@ -416,10 +545,26 @@ func TestEndpoints(t *testing.T) { ctx = route.WithParam(ctx, p, v) } - req, err := http.NewRequest("ANY", fmt.Sprintf("http://example.com?%s", test.query.Encode()), nil) + reqURL := "http://example.com" + params := test.query.Encode() + + var body io.Reader + if test.method == http.MethodPost { + body = strings.NewReader(params) + } else if test.method == "" { + test.method = "ANY" + reqURL += "?" + params + } + + req, err := http.NewRequest(test.method, reqURL, body) if err != nil { t.Fatal(err) } + + if body != nil { + req.Header.Set("Content-Type", "application/x-www-form-urlencoded") + } + resp, _, apiErr := test.endpoint(req.WithContext(ctx)) if apiErr != nil { if test.errType == errorNone { diff --git a/pkg/query/querier.go b/pkg/query/querier.go index 819ff3ac2a..07dc3ec71b 100644 --- a/pkg/query/querier.go +++ b/pkg/query/querier.go @@ -265,9 +265,20 @@ func (q *querier) LabelValues(name string) ([]string, error) { } // LabelNames returns all the unique label names present in the block in sorted order. -// TODO(bwplotka): Consider adding labelNames to thanos Query API https://github.com/improbable-eng/thanos/issues/702. func (q *querier) LabelNames() ([]string, error) { - return nil, errors.New("not implemented") + span, ctx := tracing.StartSpan(q.ctx, "querier_label_names") + defer span.Finish() + + resp, err := q.proxy.LabelNames(ctx, &storepb.LabelNamesRequest{PartialResponseDisabled: !q.partialResponse}) + if err != nil { + return nil, errors.Wrap(err, "proxy LabelNames()") + } + + for _, w := range resp.Warnings { + q.warningReporter(errors.New(w)) + } + + return resp.Names, nil } func (q *querier) Close() error { diff --git a/pkg/query/storeset.go b/pkg/query/storeset.go index a660e6566e..7f8344116d 100644 --- a/pkg/query/storeset.go +++ b/pkg/query/storeset.go @@ -81,12 +81,13 @@ type StoreSet struct { dialOpts []grpc.DialOption gRPCInfoCallTimeout time.Duration - mtx sync.RWMutex - storesStatusesMtx sync.RWMutex - stores map[string]*storeRef - storeNodeConnections prometheus.Gauge - externalLabelStores map[string]int - storeStatuses map[string]*StoreStatus + mtx sync.RWMutex + storesStatusesMtx sync.RWMutex + stores map[string]*storeRef + storeNodeConnections prometheus.Gauge + externalLabelStores map[string]int + storeStatuses map[string]*StoreStatus + unhealthyStoreTimeout time.Duration } type storeSetNodeCollector struct { @@ -118,6 +119,7 @@ func NewStoreSet( reg *prometheus.Registry, storeSpecs func() []StoreSpec, dialOpts []grpc.DialOption, + unhealthyStoreTimeout time.Duration, ) *StoreSet { storeNodeConnections := prometheus.NewGauge(prometheus.GaugeOpts{ Name: "thanos_store_nodes_grpc_connections", @@ -135,14 +137,15 @@ func NewStoreSet( } ss := &StoreSet{ - logger: log.With(logger, "component", "storeset"), - storeSpecs: storeSpecs, - dialOpts: dialOpts, - storeNodeConnections: storeNodeConnections, - gRPCInfoCallTimeout: 10 * time.Second, - externalLabelStores: map[string]int{}, - stores: make(map[string]*storeRef), - storeStatuses: make(map[string]*StoreStatus), + logger: log.With(logger, "component", "storeset"), + storeSpecs: storeSpecs, + dialOpts: dialOpts, + storeNodeConnections: storeNodeConnections, + gRPCInfoCallTimeout: 10 * time.Second, + externalLabelStores: map[string]int{}, + stores: make(map[string]*storeRef), + storeStatuses: make(map[string]*StoreStatus), + unhealthyStoreTimeout: unhealthyStoreTimeout, } storeNodeCollector := &storeSetNodeCollector{externalLabelOccurrences: ss.externalLabelOccurrences} @@ -255,6 +258,7 @@ func (s *StoreSet) Update(ctx context.Context) { } s.externalLabelStores = externalLabelStores s.storeNodeConnections.Set(float64(len(s.stores))) + s.cleanUpStoreStatuses() } func (s *StoreSet) getHealthyStores(ctx context.Context) map[string]*storeRef { @@ -345,16 +349,23 @@ func (s *StoreSet) updateStoreStatus(store *storeRef, err error) { s.storesStatusesMtx.Lock() defer s.storesStatusesMtx.Unlock() - now := time.Now() - s.storeStatuses[store.addr] = &StoreStatus{ - Name: store.addr, - LastError: err, - LastCheck: now, - Labels: store.labels, - StoreType: store.storeType, - MinTime: store.minTime, - MaxTime: store.maxTime, + status := StoreStatus{Name: store.addr} + prev, ok := s.storeStatuses[store.addr] + if ok { + status = *prev + } + + status.LastError = err + status.LastCheck = time.Now() + + if err == nil { + status.Labels = store.labels + status.StoreType = store.storeType + status.MinTime = store.minTime + status.MaxTime = store.maxTime } + + s.storeStatuses[store.addr] = &status } func (s *StoreSet) GetStoreStatus() []StoreStatus { @@ -401,3 +412,17 @@ func (s *StoreSet) Close() { st.close() } } + +func (s *StoreSet) cleanUpStoreStatuses() { + s.storesStatusesMtx.Lock() + defer s.storesStatusesMtx.Unlock() + + now := time.Now() + for addr, status := range s.storeStatuses { + if _, ok := s.stores[addr]; !ok { + if now.Sub(status.LastCheck) >= s.unhealthyStoreTimeout { + delete(s.storeStatuses, addr) + } + } + } +} diff --git a/pkg/query/storeset_test.go b/pkg/query/storeset_test.go index 1e2058d52f..bced0d2424 100644 --- a/pkg/query/storeset_test.go +++ b/pkg/query/storeset_test.go @@ -142,7 +142,7 @@ func TestStoreSet_AllAvailable_ThenDown(t *testing.T) { // Testing if duplicates can cause weird results. initialStoreAddr = append(initialStoreAddr, initialStoreAddr[0]) - storeSet := NewStoreSet(nil, nil, specsFromAddrFunc(initialStoreAddr), testGRPCOpts) + storeSet := NewStoreSet(nil, nil, specsFromAddrFunc(initialStoreAddr), testGRPCOpts, time.Minute) storeSet.gRPCInfoCallTimeout = 2 * time.Second defer storeSet.Close() @@ -185,7 +185,7 @@ func TestStoreSet_StaticStores_OneAvailable(t *testing.T) { initialStoreAddr := st.StoreAddresses() st.CloseOne(initialStoreAddr[0]) - storeSet := NewStoreSet(nil, nil, specsFromAddrFunc(initialStoreAddr), testGRPCOpts) + storeSet := NewStoreSet(nil, nil, specsFromAddrFunc(initialStoreAddr), testGRPCOpts, time.Minute) storeSet.gRPCInfoCallTimeout = 2 * time.Second defer storeSet.Close() @@ -215,7 +215,7 @@ func TestStoreSet_StaticStores_NoneAvailable(t *testing.T) { st.CloseOne(initialStoreAddr[0]) st.CloseOne(initialStoreAddr[1]) - storeSet := NewStoreSet(nil, nil, specsFromAddrFunc(initialStoreAddr), testGRPCOpts) + storeSet := NewStoreSet(nil, nil, specsFromAddrFunc(initialStoreAddr), testGRPCOpts, time.Minute) storeSet.gRPCInfoCallTimeout = 2 * time.Second // Should not matter how many of these we run. @@ -259,7 +259,7 @@ func TestStoreSet_AllAvailable_BlockExtLsetDuplicates(t *testing.T) { initialStoreAddr := st.StoreAddresses() - storeSet := NewStoreSet(nil, nil, specsFromAddrFunc(initialStoreAddr), testGRPCOpts) + storeSet := NewStoreSet(nil, nil, specsFromAddrFunc(initialStoreAddr), testGRPCOpts, time.Minute) storeSet.gRPCInfoCallTimeout = 2 * time.Second defer storeSet.Close() diff --git a/pkg/runutil/runutil.go b/pkg/runutil/runutil.go index d6dbfe7e6a..10e58ea4e0 100644 --- a/pkg/runutil/runutil.go +++ b/pkg/runutil/runutil.go @@ -33,7 +33,7 @@ // For capturing error, use CloseWithErrCapture: // // var err error -// defer runutil.CloseWithErrCapture(logger, &err, closer, "log format message") +// defer runutil.CloseWithErrCapture(&err, closer, "log format message") // // // ... // @@ -49,6 +49,7 @@ import ( "github.com/go-kit/kit/log" "github.com/go-kit/kit/log/level" "github.com/pkg/errors" + "github.com/prometheus/tsdb" ) // Repeat executes f every interval seconds until stopc is closed. @@ -107,26 +108,13 @@ func CloseWithLogOnErr(logger log.Logger, closer io.Closer, format string, a ... level.Warn(logger).Log("msg", "detected close error", "err", errors.Wrap(err, fmt.Sprintf(format, a...))) } -// CloseWithErrCapture runs function and on error tries to return error by argument. -// If error is already there we assume that error has higher priority and we just log the function error. -func CloseWithErrCapture(logger log.Logger, err *error, closer io.Closer, format string, a ...interface{}) { - closeErr := closer.Close() - if closeErr == nil { - return - } - - if *err == nil { - err = &closeErr - return - } +// CloseWithErrCapture runs function and on error return error by argument including the given error (usually +// from caller function). +func CloseWithErrCapture(err *error, closer io.Closer, format string, a ...interface{}) { + merr := tsdb.MultiError{} - // There is already an error, let's log this one. - if logger == nil { - logger = log.NewLogfmtLogger(os.Stderr) - } + merr.Add(*err) + merr.Add(errors.Wrapf(closer.Close(), format, a...)) - level.Warn(logger).Log( - "msg", "detected best effort close error that was preempted from the more important one", - "err", errors.Wrap(closeErr, fmt.Sprintf(format, a...)), - ) + *err = merr.Err() } diff --git a/pkg/runutil/runutil_test.go b/pkg/runutil/runutil_test.go new file mode 100644 index 0000000000..3ed80d17bd --- /dev/null +++ b/pkg/runutil/runutil_test.go @@ -0,0 +1,70 @@ +package runutil + +import ( + "github.com/pkg/errors" + "io" + "testing" +) + +type testCloser struct { + err error +} + +func (c testCloser) Close() error { + return c.err +} + +func TestCloseWithErrCapture(t *testing.T) { + for _, tcase := range []struct{ + err error + closer io.Closer + + expectedErrStr string + }{ + { + err: nil, + closer: testCloser{err:nil}, + expectedErrStr: "", + }, + { + err: errors.New("test"), + closer: testCloser{err:nil}, + expectedErrStr: "test", + }, + { + err: nil, + closer: testCloser{err:errors.New("test")}, + expectedErrStr: "close: test", + }, + { + err: errors.New("test"), + closer: testCloser{err:errors.New("test")}, + expectedErrStr: "2 errors: test; close: test", + }, + }{ + if ok := t.Run("", func(t *testing.T) { + ret := tcase.err + CloseWithErrCapture(&ret, tcase.closer, "close") + + if tcase.expectedErrStr == "" { + if ret != nil { + t.Error("Expected error to be nil") + t.Fail() + } + } else { + if ret == nil { + t.Error("Expected error to be not nil") + t.Fail() + } + + if tcase.expectedErrStr != ret.Error() { + t.Errorf("%s != %s", tcase.expectedErrStr, ret.Error()) + t.Fail() + } + } + + }); !ok { + return + } + } +} \ No newline at end of file diff --git a/pkg/shipper/shipper.go b/pkg/shipper/shipper.go index c6f526f593..b4758e1749 100644 --- a/pkg/shipper/shipper.go +++ b/pkg/shipper/shipper.go @@ -7,24 +7,20 @@ import ( "encoding/json" "io/ioutil" "math" - "net/url" "os" "path" "path/filepath" "sort" - "time" "github.com/go-kit/kit/log" "github.com/go-kit/kit/log/level" "github.com/improbable-eng/thanos/pkg/block" "github.com/improbable-eng/thanos/pkg/block/metadata" "github.com/improbable-eng/thanos/pkg/objstore" - "github.com/improbable-eng/thanos/pkg/promclient" "github.com/improbable-eng/thanos/pkg/runutil" "github.com/oklog/ulid" "github.com/pkg/errors" "github.com/prometheus/client_golang/prometheus" - "github.com/prometheus/common/model" "github.com/prometheus/tsdb" "github.com/prometheus/tsdb/fileutil" "github.com/prometheus/tsdb/labels" @@ -120,15 +116,13 @@ func New( // to remote if necessary, including compacted blocks which are already in filesystem. // It attaches the Thanos metadata section in each meta JSON file. func NewWithCompacted( - ctx context.Context, logger log.Logger, r prometheus.Registerer, dir string, bucket objstore.Bucket, lbls func() labels.Labels, source metadata.SourceType, - prometheusURL *url.URL, -) (*Shipper, error) { +) *Shipper { if logger == nil { logger = log.NewNopLogger() } @@ -136,25 +130,6 @@ func NewWithCompacted( lbls = func() labels.Labels { return nil } } - ctx, cancel := context.WithTimeout(ctx, 30*time.Second) - defer cancel() - - var flags promclient.Flags - if err := runutil.Retry(1*time.Second, ctx.Done(), func() (err error) { - flags, err = promclient.ConfiguredFlags(ctx, logger, prometheusURL) - if err != nil { - return errors.Wrap(err, "configured flags; failed to check if compaction is disabled") - } - return nil - }); err != nil { - return nil, err - } - - if flags.TSDBMinTime != model.Duration(2*time.Hour) || flags.TSDBMaxTime != model.Duration(2*time.Hour) { - return nil, errors.Errorf("Found that TSDB Max time is %s and Min time is %s. To use shipper with upload compacted option, "+ - "compaction needs to be disabled (storage.tsdb.min-block-duration = storage.tsdb.max-block-duration = 2h", flags.TSDBMinTime, flags.TSDBMaxTime) - } - return &Shipper{ logger: logger, dir: dir, @@ -163,7 +138,7 @@ func NewWithCompacted( metrics: newMetrics(r, true), source: source, uploadCompacted: true, - }, nil + } } // Timestamps returns the minimum timestamp for which data is available and the highest timestamp diff --git a/pkg/shipper/shipper_e2e_test.go b/pkg/shipper/shipper_e2e_test.go index 8ddde0e771..62c6b65d02 100644 --- a/pkg/shipper/shipper_e2e_test.go +++ b/pkg/shipper/shipper_e2e_test.go @@ -6,7 +6,6 @@ import ( "encoding/json" "io/ioutil" "math/rand" - "net/url" "os" "path" "path/filepath" @@ -191,11 +190,7 @@ func TestShipper_SyncBlocksWithMigrating_e2e(t *testing.T) { defer upcancel() testutil.Ok(t, p.WaitPrometheusUp(upctx)) - addr, err := url.Parse(p.Addr()) - testutil.Ok(t, err) - - shipper, err := NewWithCompacted(ctx, log.NewLogfmtLogger(os.Stderr), nil, dir, bkt, func() labels.Labels { return extLset }, metadata.TestSource, addr) - testutil.NotOk(t, err) // Compaction not disabled! + shipper := NewWithCompacted(log.NewLogfmtLogger(os.Stderr), nil, dir, bkt, func() labels.Labels { return extLset }, metadata.TestSource) p.DisableCompaction() testutil.Ok(t, p.Restart()) @@ -204,11 +199,7 @@ func TestShipper_SyncBlocksWithMigrating_e2e(t *testing.T) { defer upcancel2() testutil.Ok(t, p.WaitPrometheusUp(upctx2)) - addr, err = url.Parse("http://" + p.Addr()) - testutil.Ok(t, err) - - shipper, err = NewWithCompacted(ctx, log.NewLogfmtLogger(os.Stderr), nil, dir, bkt, func() labels.Labels { return extLset }, metadata.TestSource, addr) - testutil.Ok(t, err) + shipper = NewWithCompacted(log.NewLogfmtLogger(os.Stderr), nil, dir, bkt, func() labels.Labels { return extLset }, metadata.TestSource) // Create 10 new blocks. 9 of them (non compacted) should be actually uploaded. var ( diff --git a/pkg/store/bucket.go b/pkg/store/bucket.go index d2070229f1..3823d84225 100644 --- a/pkg/store/bucket.go +++ b/pkg/store/bucket.go @@ -175,6 +175,13 @@ func newBucketStoreMetrics(reg prometheus.Registerer) *bucketStoreMetrics { return &m } +type indexCache interface { + SetPostings(b ulid.ULID, l labels.Label, v []byte) + Postings(b ulid.ULID, l labels.Label) ([]byte, bool) + SetSeries(b ulid.ULID, id uint64, v []byte) + Series(b ulid.ULID, id uint64) ([]byte, bool) +} + // BucketStore implements the store API backed by a bucket. It loads all index // files to local disk. type BucketStore struct { @@ -182,7 +189,7 @@ type BucketStore struct { metrics *bucketStoreMetrics bucket objstore.BucketReader dir string - indexCache *indexCache + indexCache indexCache chunkPool *pool.BytesPool // Sets of blocks that have the same labels. They are indexed by a hash over their label set. @@ -210,7 +217,7 @@ func NewBucketStore( reg prometheus.Registerer, bucket objstore.BucketReader, dir string, - indexCacheSizeBytes uint64, + indexCache indexCache, maxChunkPoolBytes uint64, maxSampleCount uint64, maxConcurrent int, @@ -225,10 +232,6 @@ func NewBucketStore( return nil, errors.Errorf("max concurrency value cannot be lower than 0 (got %v)", maxConcurrent) } - indexCache, err := newIndexCache(reg, indexCacheSizeBytes) - if err != nil { - return nil, errors.Wrap(err, "create index cache") - } chunkPool, err := pool.NewBytesPool(2e5, 50e6, 2, maxChunkPoolBytes) if err != nil { return nil, errors.Wrap(err, "create chunk pool") @@ -856,8 +859,38 @@ func chunksSize(chks []storepb.AggrChunk) (size int) { } // LabelNames implements the storepb.StoreServer interface. -func (s *BucketStore) LabelNames(context.Context, *storepb.LabelNamesRequest) (*storepb.LabelNamesResponse, error) { - return nil, status.Error(codes.Unimplemented, "not implemented") +func (s *BucketStore) LabelNames(ctx context.Context, _ *storepb.LabelNamesRequest) (*storepb.LabelNamesResponse, error) { + g, gctx := errgroup.WithContext(ctx) + + s.mtx.RLock() + + var mtx sync.Mutex + var sets [][]string + + for _, b := range s.blocks { + indexr := b.indexReader(gctx) + g.Go(func() error { + defer runutil.CloseWithLogOnErr(s.logger, indexr, "label names") + + res := indexr.LabelNames() + sort.Strings(res) + + mtx.Lock() + sets = append(sets, res) + mtx.Unlock() + + return nil + }) + } + + s.mtx.RUnlock() + + if err := g.Wait(); err != nil { + return nil, status.Error(codes.Internal, err.Error()) + } + return &storepb.LabelNamesResponse{ + Names: strutil.MergeSlices(sets...), + }, nil } // LabelValues implements the storepb.StoreServer interface. @@ -1028,15 +1061,16 @@ type bucketBlock struct { bucket objstore.BucketReader meta *metadata.Meta dir string - indexCache *indexCache + indexCache indexCache chunkPool *pool.BytesPool indexVersion int symbols map[uint32]string + symbolsV2 map[string]struct{} lvals map[string][]string postings map[labels.Label]index.Range - indexObj string + id ulid.ULID chunkObjs []string pendingReaders sync.WaitGroup @@ -1050,14 +1084,14 @@ func newBucketBlock( bkt objstore.BucketReader, id ulid.ULID, dir string, - indexCache *indexCache, + indexCache indexCache, chunkPool *pool.BytesPool, p partitioner, ) (b *bucketBlock, err error) { b = &bucketBlock{ logger: logger, bucket: bkt, - indexObj: path.Join(id.String(), block.IndexFilename), + id: id, indexCache: indexCache, chunkPool: chunkPool, dir: dir, @@ -1066,7 +1100,7 @@ func newBucketBlock( if err = b.loadMeta(ctx, id); err != nil { return nil, errors.Wrap(err, "load meta") } - if err = b.loadIndexCache(ctx); err != nil { + if err = b.loadIndexCacheFile(ctx); err != nil { return nil, errors.Wrap(err, "load index cache") } // Get object handles for all chunk files. @@ -1080,6 +1114,14 @@ func newBucketBlock( return b, nil } +func (b *bucketBlock) indexFilename() string { + return path.Join(b.id.String(), block.IndexFilename) +} + +func (b *bucketBlock) indexCacheFilename() string { + return path.Join(b.id.String(), block.IndexCacheFilename) +} + func (b *bucketBlock) loadMeta(ctx context.Context, id ulid.ULID) error { // If we haven't seen the block before download the meta.json file. if _, err := os.Stat(b.dir); os.IsNotExist(err) { @@ -1102,22 +1144,31 @@ func (b *bucketBlock) loadMeta(ctx context.Context, id ulid.ULID) error { return nil } -func (b *bucketBlock) loadIndexCache(ctx context.Context) (err error) { +func (b *bucketBlock) loadIndexCacheFile(ctx context.Context) (err error) { cachefn := filepath.Join(b.dir, block.IndexCacheFilename) - - b.indexVersion, b.symbols, b.lvals, b.postings, err = block.ReadIndexCache(b.logger, cachefn) - if err == nil { + if err = b.loadIndexCacheFileFromFile(ctx, cachefn); err == nil { return nil } if !os.IsNotExist(errors.Cause(err)) { return errors.Wrap(err, "read index cache") } - // No cache exists is on disk yet, build it from the downloaded index and retry. + + // Try to download index cache file from object store. + if err = objstore.DownloadFile(ctx, b.logger, b.bucket, b.indexCacheFilename(), cachefn); err == nil { + return b.loadIndexCacheFileFromFile(ctx, cachefn) + } + + if !b.bucket.IsObjNotFoundErr(errors.Cause(err)) { + return errors.Wrap(err, "download index cache file") + } + + // No cache exists on disk yet, build it from the downloaded index and retry. fn := filepath.Join(b.dir, block.IndexFilename) - if err := objstore.DownloadFile(ctx, b.logger, b.bucket, b.indexObj, fn); err != nil { + if err := objstore.DownloadFile(ctx, b.logger, b.bucket, b.indexFilename(), fn); err != nil { return errors.Wrap(err, "download index file") } + defer func() { if rerr := os.Remove(fn); rerr != nil { level.Error(b.logger).Log("msg", "failed to remove temp index file", "path", fn, "err", rerr) @@ -1128,15 +1179,16 @@ func (b *bucketBlock) loadIndexCache(ctx context.Context) (err error) { return errors.Wrap(err, "write index cache") } - b.indexVersion, b.symbols, b.lvals, b.postings, err = block.ReadIndexCache(b.logger, cachefn) - if err != nil { - return errors.Wrap(err, "read index cache") - } - return nil + return errors.Wrap(b.loadIndexCacheFileFromFile(ctx, cachefn), "read index cache") +} + +func (b *bucketBlock) loadIndexCacheFileFromFile(ctx context.Context, cache string) (err error) { + b.indexVersion, b.symbols, b.lvals, b.postings, err = block.ReadIndexCache(b.logger, cache) + return err } func (b *bucketBlock) readIndexRange(ctx context.Context, off, length int64) ([]byte, error) { - r, err := b.bucket.GetRange(ctx, b.indexObj, off, length) + r, err := b.bucket.GetRange(ctx, b.indexFilename(), off, length) if err != nil { return nil, errors.Wrap(err, "get range reader") } @@ -1192,13 +1244,13 @@ type bucketIndexReader struct { block *bucketBlock dec *index.Decoder stats *queryStats - cache *indexCache + cache indexCache mtx sync.Mutex loadedSeries map[uint64][]byte } -func newBucketIndexReader(ctx context.Context, logger log.Logger, block *bucketBlock, cache *indexCache) *bucketIndexReader { +func newBucketIndexReader(ctx context.Context, logger log.Logger, block *bucketBlock, cache indexCache) *bucketIndexReader { r := &bucketIndexReader{ logger: logger, ctx: ctx, @@ -1366,7 +1418,7 @@ func (r *bucketIndexReader) fetchPostings(groups []*postingGroup) error { for i, g := range groups { for j, key := range g.keys { // Get postings for the given key from cache first. - if b, ok := r.cache.postings(r.block.meta.ULID, key); ok { + if b, ok := r.cache.Postings(r.block.meta.ULID, key); ok { r.stats.postingsTouched++ r.stats.postingsTouchedSizeSum += len(b) @@ -1438,7 +1490,7 @@ func (r *bucketIndexReader) fetchPostings(groups []*postingGroup) error { // Return postings and fill LRU cache. groups[p.groupID].Fill(p.keyID, fetchedPostings) - r.cache.setPostings(r.block.meta.ULID, groups[p.groupID].keys[p.keyID], c) + r.cache.SetPostings(r.block.meta.ULID, groups[p.groupID].keys[p.keyID], c) // If we just fetched it we still have to update the stats for touched postings. r.stats.postingsTouched++ @@ -1461,7 +1513,7 @@ func (r *bucketIndexReader) PreloadSeries(ids []uint64) error { var newIDs []uint64 for _, id := range ids { - if b, ok := r.cache.series(r.block.meta.ULID, id); ok { + if b, ok := r.cache.Series(r.block.meta.ULID, id); ok { r.loadedSeries[id] = b continue } @@ -1518,7 +1570,7 @@ func (r *bucketIndexReader) loadSeries(ctx context.Context, ids []uint64, start, } c = c[n : n+int(l)] r.loadedSeries[id] = c - r.cache.setSeries(r.block.meta.ULID, id, c) + r.cache.SetSeries(r.block.meta.ULID, id, c) } return nil } @@ -1597,6 +1649,15 @@ func (r *bucketIndexReader) LabelValues(name string) []string { return res } +// LabelNames returns a list of label names. +func (r *bucketIndexReader) LabelNames() []string { + res := make([]string, 0, len(r.block.lvals)) + for ln, _ := range r.block.lvals { + res = append(res, ln) + } + return res +} + // Close released the underlying resources of the reader. func (r *bucketIndexReader) Close() error { r.block.pendingReaders.Done() diff --git a/pkg/store/bucket_e2e_test.go b/pkg/store/bucket_e2e_test.go index 997767d055..c0fc42b97c 100644 --- a/pkg/store/bucket_e2e_test.go +++ b/pkg/store/bucket_e2e_test.go @@ -9,12 +9,15 @@ import ( "testing" "time" + "github.com/oklog/ulid" + "github.com/go-kit/kit/log" "github.com/improbable-eng/thanos/pkg/block" "github.com/improbable-eng/thanos/pkg/block/metadata" "github.com/improbable-eng/thanos/pkg/objstore" "github.com/improbable-eng/thanos/pkg/objstore/objtesting" "github.com/improbable-eng/thanos/pkg/runutil" + storecache "github.com/improbable-eng/thanos/pkg/store/cache" "github.com/improbable-eng/thanos/pkg/store/storepb" "github.com/improbable-eng/thanos/pkg/testutil" "github.com/pkg/errors" @@ -22,12 +25,46 @@ import ( "github.com/prometheus/tsdb/labels" ) +type noopCache struct{} + +func (noopCache) SetPostings(b ulid.ULID, l labels.Label, v []byte) {} +func (noopCache) Postings(b ulid.ULID, l labels.Label) ([]byte, bool) { return nil, false } +func (noopCache) SetSeries(b ulid.ULID, id uint64, v []byte) {} +func (noopCache) Series(b ulid.ULID, id uint64) ([]byte, bool) { return nil, false } + +type swappableCache struct { + ptr indexCache +} + +func (c *swappableCache) SwapWith(ptr2 indexCache) { + c.ptr = ptr2 +} + +func (c *swappableCache) SetPostings(b ulid.ULID, l labels.Label, v []byte) { + c.ptr.SetPostings(b, l, v) +} + +func (c *swappableCache) Postings(b ulid.ULID, l labels.Label) ([]byte, bool) { + return c.ptr.Postings(b, l) +} + +func (c *swappableCache) SetSeries(b ulid.ULID, id uint64, v []byte) { + c.ptr.SetSeries(b, id, v) +} + +func (c *swappableCache) Series(b ulid.ULID, id uint64) ([]byte, bool) { + return c.ptr.Series(b, id) +} + type storeSuite struct { cancel context.CancelFunc wg sync.WaitGroup store *BucketStore minTime, maxTime int64 + cache *swappableCache + + logger log.Logger } func (s *storeSuite) Close() { @@ -52,7 +89,11 @@ func prepareStoreWithTestBlocks(t testing.TB, dir string, bkt objstore.Bucket, m now := start ctx, cancel := context.WithCancel(context.Background()) - s := &storeSuite{cancel: cancel} + s := &storeSuite{ + cancel: cancel, + logger: log.NewLogfmtLogger(os.Stderr), + cache: &swappableCache{}, + } blocks := 0 for i := 0; i < 3; i++ { mint := timestamp.FromTime(now) @@ -66,9 +107,9 @@ func prepareStoreWithTestBlocks(t testing.TB, dir string, bkt objstore.Bucket, m // Create two blocks per time slot. Only add 10 samples each so only one chunk // gets created each. This way we can easily verify we got 10 chunks per series below. - id1, err := testutil.CreateBlock(dir, series[:4], 10, mint, maxt, extLset, 0) + id1, err := testutil.CreateBlock(ctx, dir, series[:4], 10, mint, maxt, extLset, 0) testutil.Ok(t, err) - id2, err := testutil.CreateBlock(dir, series[4:], 10, mint, maxt, extLset, 0) + id2, err := testutil.CreateBlock(ctx, dir, series[4:], 10, mint, maxt, extLset, 0) testutil.Ok(t, err) dir1, dir2 := filepath.Join(dir, id1.String()), filepath.Join(dir, id2.String()) @@ -77,17 +118,17 @@ func prepareStoreWithTestBlocks(t testing.TB, dir string, bkt objstore.Bucket, m meta, err := metadata.Read(dir2) testutil.Ok(t, err) meta.Thanos.Labels = map[string]string{"ext2": "value2"} - testutil.Ok(t, metadata.Write(log.NewNopLogger(), dir2, meta)) + testutil.Ok(t, metadata.Write(s.logger, dir2, meta)) - testutil.Ok(t, block.Upload(ctx, log.NewNopLogger(), bkt, dir1)) - testutil.Ok(t, block.Upload(ctx, log.NewNopLogger(), bkt, dir2)) + testutil.Ok(t, block.Upload(ctx, s.logger, bkt, dir1)) + testutil.Ok(t, block.Upload(ctx, s.logger, bkt, dir2)) blocks += 2 testutil.Ok(t, os.RemoveAll(dir1)) testutil.Ok(t, os.RemoveAll(dir2)) } - store, err := NewBucketStore(log.NewLogfmtLogger(os.Stderr), nil, bkt, dir, 100, 0, maxSampleCount, 20, false, 20) + store, err := NewBucketStore(s.logger, nil, bkt, dir, s.cache, 0, maxSampleCount, 20, false, 20) testutil.Ok(t, err) s.store = store @@ -309,10 +350,6 @@ func testBucketStore_e2e(t testing.TB, ctx context.Context, s *storeSuite) { } { t.Log("Run ", i) - // Always clean cache before each test. - s.store.indexCache, err = newIndexCache(nil, 100) - testutil.Ok(t, err) - srv := newStoreSeriesServer(ctx) testutil.Ok(t, s.store.Series(tcase.req, srv)) @@ -337,6 +374,26 @@ func TestBucketStore_e2e(t *testing.T) { s := prepareStoreWithTestBlocks(t, dir, bkt, false, 0) defer s.Close() + t.Log("Test with no index cache") + s.cache.SwapWith(noopCache{}) + testBucketStore_e2e(t, ctx, s) + + t.Log("Test with large, sufficient index cache") + indexCache, err := storecache.NewIndexCache(s.logger, nil, storecache.Opts{ + MaxItemSizeBytes: 1e5, + MaxSizeBytes: 2e5, + }) + testutil.Ok(t, err) + s.cache.SwapWith(indexCache) + testBucketStore_e2e(t, ctx, s) + + t.Log("Test with small index cache") + indexCache2, err := storecache.NewIndexCache(s.logger, nil, storecache.Opts{ + MaxItemSizeBytes: 50, + MaxSizeBytes: 100, + }) + testutil.Ok(t, err) + s.cache.SwapWith(indexCache2) testBucketStore_e2e(t, ctx, s) }) } @@ -366,6 +423,13 @@ func TestBucketStore_ManyParts_e2e(t *testing.T) { s := prepareStoreWithTestBlocks(t, dir, bkt, true, 0) defer s.Close() + indexCache, err := storecache.NewIndexCache(s.logger, nil, storecache.Opts{ + MaxItemSizeBytes: 1e5, + MaxSizeBytes: 2e5, + }) + testutil.Ok(t, err) + s.cache.SwapWith(indexCache) + testBucketStore_e2e(t, ctx, s) }) } diff --git a/pkg/store/bucket_test.go b/pkg/store/bucket_test.go index 18f953c298..4e9e581014 100644 --- a/pkg/store/bucket_test.go +++ b/pkg/store/bucket_test.go @@ -283,7 +283,7 @@ func TestBucketStore_Info(t *testing.T) { dir, err := ioutil.TempDir("", "prometheus-test") testutil.Ok(t, err) - bucketStore, err := NewBucketStore(nil, nil, nil, dir, 2e5, 2e5, 0, 0, false, 20) + bucketStore, err := NewBucketStore(nil, nil, nil, dir, noopCache{}, 2e5, 0, 0, false, 20) testutil.Ok(t, err) resp, err := bucketStore.Info(ctx, &storepb.InfoRequest{}) diff --git a/pkg/store/cache.go b/pkg/store/cache.go deleted file mode 100644 index 58e720790b..0000000000 --- a/pkg/store/cache.go +++ /dev/null @@ -1,206 +0,0 @@ -package store - -import ( - "sync" - - lru "github.com/hashicorp/golang-lru/simplelru" - "github.com/oklog/ulid" - "github.com/prometheus/client_golang/prometheus" - "github.com/prometheus/tsdb/labels" -) - -const ( - cacheTypePostings = "postings" - cacheTypeSeries = "series" -) - -type cacheItem struct { - block ulid.ULID - key interface{} -} - -func (c cacheItem) keyType() string { - switch c.key.(type) { - case cacheKeyPostings: - return cacheTypePostings - case cacheKeySeries: - return cacheTypeSeries - } - return "" -} - -type cacheKeyPostings labels.Label -type cacheKeySeries uint64 - -type indexCache struct { - mtx sync.Mutex - lru *lru.LRU - maxSize uint64 - curSize uint64 - - requests *prometheus.CounterVec - hits *prometheus.CounterVec - added *prometheus.CounterVec - current *prometheus.GaugeVec - currentSize *prometheus.GaugeVec - overflow *prometheus.CounterVec -} - -// newIndexCache creates a new LRU cache for index entries and ensures the total cache -// size approximately does not exceed maxBytes. -func newIndexCache(reg prometheus.Registerer, maxBytes uint64) (*indexCache, error) { - c := &indexCache{ - maxSize: maxBytes, - } - evicted := prometheus.NewCounterVec(prometheus.CounterOpts{ - Name: "thanos_store_index_cache_items_evicted_total", - Help: "Total number of items that were evicted from the index cache.", - }, []string{"item_type"}) - - c.added = prometheus.NewCounterVec(prometheus.CounterOpts{ - Name: "thanos_store_index_cache_items_added_total", - Help: "Total number of items that were added to the index cache.", - }, []string{"item_type"}) - - c.requests = prometheus.NewCounterVec(prometheus.CounterOpts{ - Name: "thanos_store_index_cache_requests_total", - Help: "Total number of requests to the cache.", - }, []string{"item_type"}) - - c.overflow = prometheus.NewCounterVec(prometheus.CounterOpts{ - Name: "thanos_store_index_cache_items_overflowed_total", - Help: "Total number of items that could not be added to the cache due to being too big.", - }, []string{"item_type"}) - - c.hits = prometheus.NewCounterVec(prometheus.CounterOpts{ - Name: "thanos_store_index_cache_hits_total", - Help: "Total number of requests to the cache that were a hit.", - }, []string{"item_type"}) - - c.current = prometheus.NewGaugeVec(prometheus.GaugeOpts{ - Name: "thanos_store_index_cache_items", - Help: "Current number of items in the index cache.", - }, []string{"item_type"}) - - c.currentSize = prometheus.NewGaugeVec(prometheus.GaugeOpts{ - Name: "thanos_store_index_cache_items_size_bytes", - Help: "Current byte size of items in the index cache.", - }, []string{"item_type"}) - - // Initialize eviction metric with 0. - evicted.WithLabelValues(cacheTypePostings) - evicted.WithLabelValues(cacheTypeSeries) - - // Initialize LRU cache with a high size limit since we will manage evictions ourselves - // based on stored size. - onEvict := func(key, val interface{}) { - k := key.(cacheItem).keyType() - v := val.([]byte) - - evicted.WithLabelValues(k).Inc() - c.current.WithLabelValues(k).Dec() - c.currentSize.WithLabelValues(k).Sub(float64(len(v))) - - c.curSize -= uint64(len(v)) - } - l, err := lru.NewLRU(1e12, onEvict) - if err != nil { - return nil, err - } - c.lru = l - - if reg != nil { - reg.MustRegister(prometheus.NewGaugeFunc(prometheus.GaugeOpts{ - Name: "thanos_store_index_cache_max_size_bytes", - Help: "Maximum number of bytes to be held in the index cache.", - }, func() float64 { - return float64(maxBytes) - })) - reg.MustRegister(c.requests, c.hits, c.added, evicted, c.current, c.currentSize) - } - return c, nil -} - -// ensureFits tries to make sure that the passed slice will fit into the LRU cache. -// Returns true if it will fit. -func (c *indexCache) ensureFits(b []byte) bool { - if uint64(len(b)) > c.maxSize { - return false - } - for c.curSize > c.maxSize-uint64(len(b)) { - c.lru.RemoveOldest() - } - return true -} - -func (c *indexCache) setPostings(b ulid.ULID, l labels.Label, v []byte) { - c.added.WithLabelValues(cacheTypePostings).Inc() - - c.mtx.Lock() - defer c.mtx.Unlock() - - if !c.ensureFits(v) { - c.overflow.WithLabelValues(cacheTypePostings).Inc() - return - } - - // The caller may be passing in a sub-slice of a huge array. Copy the data - // to ensure we don't waste huge amounts of space for something small. - cv := make([]byte, len(v)) - copy(cv, v) - c.lru.Add(cacheItem{b, cacheKeyPostings(l)}, cv) - - c.currentSize.WithLabelValues(cacheTypePostings).Add(float64(len(v))) - c.current.WithLabelValues(cacheTypePostings).Inc() - c.curSize += uint64(len(v)) -} - -func (c *indexCache) postings(b ulid.ULID, l labels.Label) ([]byte, bool) { - c.requests.WithLabelValues(cacheTypePostings).Inc() - - c.mtx.Lock() - defer c.mtx.Unlock() - - v, ok := c.lru.Get(cacheItem{b, cacheKeyPostings(l)}) - if !ok { - return nil, false - } - c.hits.WithLabelValues(cacheTypePostings).Inc() - return v.([]byte), true -} - -func (c *indexCache) setSeries(b ulid.ULID, id uint64, v []byte) { - c.added.WithLabelValues(cacheTypeSeries).Inc() - - c.mtx.Lock() - defer c.mtx.Unlock() - - if !c.ensureFits(v) { - c.overflow.WithLabelValues(cacheTypeSeries).Inc() - return - } - - // The caller may be passing in a sub-slice of a huge array. Copy the data - // to ensure we don't waste huge amounts of space for something small. - cv := make([]byte, len(v)) - copy(cv, v) - c.lru.Add(cacheItem{b, cacheKeySeries(id)}, cv) - - c.currentSize.WithLabelValues(cacheTypeSeries).Add(float64(len(v))) - c.current.WithLabelValues(cacheTypeSeries).Inc() - c.curSize += uint64(len(v)) -} - -func (c *indexCache) series(b ulid.ULID, id uint64) ([]byte, bool) { - c.requests.WithLabelValues(cacheTypeSeries).Inc() - - c.mtx.Lock() - defer c.mtx.Unlock() - - v, ok := c.lru.Get(cacheItem{b, cacheKeySeries(id)}) - if !ok { - return nil, false - } - c.hits.WithLabelValues(cacheTypeSeries).Inc() - return v.([]byte), true -} diff --git a/pkg/store/cache/cache.go b/pkg/store/cache/cache.go new file mode 100644 index 0000000000..cd75332b87 --- /dev/null +++ b/pkg/store/cache/cache.go @@ -0,0 +1,322 @@ +package storecache + +import ( + "math" + "sync" + + "github.com/go-kit/kit/log" + "github.com/go-kit/kit/log/level" + lru "github.com/hashicorp/golang-lru/simplelru" + "github.com/oklog/ulid" + "github.com/pkg/errors" + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/tsdb/labels" +) + +const ( + cacheTypePostings string = "Postings" + cacheTypeSeries string = "Series" + + sliceHeaderSize = 16 +) + +type cacheKey struct { + block ulid.ULID + key interface{} +} + +func (c cacheKey) keyType() string { + switch c.key.(type) { + case cacheKeyPostings: + return cacheTypePostings + case cacheKeySeries: + return cacheTypeSeries + } + return "" +} + +func (c cacheKey) size() uint64 { + switch k := c.key.(type) { + case cacheKeyPostings: + // ULID + 2 slice headers + number of chars in value and name. + return 16 + 2*sliceHeaderSize + uint64(len(k.Value)+len(k.Name)) + case cacheKeySeries: + return 16 + 8 // ULID + uint64 + } + return 0 +} + +type cacheKeyPostings labels.Label +type cacheKeySeries uint64 + +type IndexCache struct { + mtx sync.Mutex + + logger log.Logger + lru *lru.LRU + maxSizeBytes uint64 + maxItemSizeBytes uint64 + + curSize uint64 + + evicted *prometheus.CounterVec + requests *prometheus.CounterVec + hits *prometheus.CounterVec + added *prometheus.CounterVec + current *prometheus.GaugeVec + currentSize *prometheus.GaugeVec + totalCurrentSize *prometheus.GaugeVec + overflow *prometheus.CounterVec +} + +type Opts struct { + // MaxSizeBytes represents overall maximum number of bytes cache can contain. + MaxSizeBytes uint64 + // MaxItemSizeBytes represents maximum size of single item. + MaxItemSizeBytes uint64 +} + +// NewIndexCache creates a new thread-safe LRU cache for index entries and ensures the total cache +// size approximately does not exceed maxBytes. +func NewIndexCache(logger log.Logger, reg prometheus.Registerer, opts Opts) (*IndexCache, error) { + if opts.MaxItemSizeBytes > opts.MaxSizeBytes { + return nil, errors.Errorf("max item size (%v) cannot be bigger than overall cache size (%v)", opts.MaxItemSizeBytes, opts.MaxSizeBytes) + } + + c := &IndexCache{ + logger: logger, + maxSizeBytes: opts.MaxSizeBytes, + maxItemSizeBytes: opts.MaxItemSizeBytes, + } + + c.evicted = prometheus.NewCounterVec(prometheus.CounterOpts{ + Name: "thanos_store_index_cache_items_evicted_total", + Help: "Total number of items that were evicted from the index cache.", + }, []string{"item_type"}) + c.evicted.WithLabelValues(cacheTypePostings) + c.evicted.WithLabelValues(cacheTypeSeries) + + c.added = prometheus.NewCounterVec(prometheus.CounterOpts{ + Name: "thanos_store_index_cache_items_added_total", + Help: "Total number of items that were added to the index cache.", + }, []string{"item_type"}) + c.added.WithLabelValues(cacheTypePostings) + c.added.WithLabelValues(cacheTypeSeries) + + c.requests = prometheus.NewCounterVec(prometheus.CounterOpts{ + Name: "thanos_store_index_cache_requests_total", + Help: "Total number of requests to the cache.", + }, []string{"item_type"}) + c.requests.WithLabelValues(cacheTypePostings) + c.requests.WithLabelValues(cacheTypeSeries) + + c.overflow = prometheus.NewCounterVec(prometheus.CounterOpts{ + Name: "thanos_store_index_cache_items_overflowed_total", + Help: "Total number of items that could not be added to the cache due to being too big.", + }, []string{"item_type"}) + c.overflow.WithLabelValues(cacheTypePostings) + c.overflow.WithLabelValues(cacheTypeSeries) + + c.hits = prometheus.NewCounterVec(prometheus.CounterOpts{ + Name: "thanos_store_index_cache_hits_total", + Help: "Total number of requests to the cache that were a hit.", + }, []string{"item_type"}) + c.hits.WithLabelValues(cacheTypePostings) + c.hits.WithLabelValues(cacheTypeSeries) + + c.current = prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Name: "thanos_store_index_cache_items", + Help: "Current number of items in the index cache.", + }, []string{"item_type"}) + c.current.WithLabelValues(cacheTypePostings) + c.current.WithLabelValues(cacheTypeSeries) + + c.currentSize = prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Name: "thanos_store_index_cache_items_size_bytes", + Help: "Current byte size of items in the index cache.", + }, []string{"item_type"}) + c.currentSize.WithLabelValues(cacheTypePostings) + c.currentSize.WithLabelValues(cacheTypeSeries) + + c.totalCurrentSize = prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Name: "thanos_store_index_cache_total_size_bytes", + Help: "Current byte size of items (both value and key) in the index cache.", + }, []string{"item_type"}) + c.totalCurrentSize.WithLabelValues(cacheTypePostings) + c.totalCurrentSize.WithLabelValues(cacheTypeSeries) + + if reg != nil { + reg.MustRegister(prometheus.NewGaugeFunc(prometheus.GaugeOpts{ + Name: "thanos_store_index_cache_max_size_bytes", + Help: "Maximum number of bytes to be held in the index cache.", + }, func() float64 { + return float64(c.maxSizeBytes) + })) + reg.MustRegister(prometheus.NewGaugeFunc(prometheus.GaugeOpts{ + Name: "thanos_store_index_cache_max_item_size_bytes", + Help: "Maximum number of bytes for single entry to be held in the index cache.", + }, func() float64 { + return float64(c.maxItemSizeBytes) + })) + reg.MustRegister(c.requests, c.hits, c.added, c.evicted, c.current, c.currentSize, c.totalCurrentSize, c.overflow) + } + + // Initialize LRU cache with a high size limit since we will manage evictions ourselves + // based on stored size using `RemoveOldest` method. + l, err := lru.NewLRU(math.MaxInt64, c.onEvict) + if err != nil { + return nil, err + } + c.lru = l + + level.Info(logger).Log( + "msg", "created index cache", + "maxItemSizeBytes", c.maxItemSizeBytes, + "maxSizeBytes", c.maxSizeBytes, + "maxItems", "math.MaxInt64", + ) + return c, nil +} + +func (c *IndexCache) onEvict(key, val interface{}) { + k := key.(cacheKey).keyType() + entrySize := sliceHeaderSize + uint64(len(val.([]byte))) + + c.evicted.WithLabelValues(string(k)).Inc() + c.current.WithLabelValues(string(k)).Dec() + c.currentSize.WithLabelValues(string(k)).Sub(float64(entrySize)) + c.totalCurrentSize.WithLabelValues(string(k)).Sub(float64(entrySize + key.(cacheKey).size())) + + c.curSize -= entrySize +} + +// ensureFits tries to make sure that the passed slice will fit into the LRU cache. +// Returns true if it will fit. +func (c *IndexCache) ensureFits(size uint64, typ string) bool { + const saneMaxIterations = 500 + + if size > c.maxItemSizeBytes { + level.Debug(c.logger).Log( + "msg", "item bigger than maxItemSizeBytes. Ignoring..", + "maxItemSizeBytes", c.maxItemSizeBytes, + "maxSizeBytes", c.maxSizeBytes, + "curSize", c.curSize, + "itemSize", size, + "cacheType", typ, + ) + return false + } + + for i := 0; c.curSize+size > c.maxSizeBytes; i++ { + if i >= saneMaxIterations { + level.Error(c.logger).Log( + "msg", "After max sane iterations of LRU evictions, we still cannot allocate the item. Ignoring.", + "maxItemSizeBytes", c.maxItemSizeBytes, + "maxSizeBytes", c.maxSizeBytes, + "curSize", c.curSize, + "itemSize", size, + "cacheType", typ, + "iterations", i, + ) + return false + } + + _, _, ok := c.lru.RemoveOldest() + if !ok { + level.Error(c.logger).Log( + "msg", "LRU has nothing more to evict, but we still cannot allocate the item. Ignoring.", + "maxItemSizeBytes", c.maxItemSizeBytes, + "maxSizeBytes", c.maxSizeBytes, + "curSize", c.curSize, + "itemSize", size, + "cacheType", typ, + ) + return false + } + } + return true +} + +func (c *IndexCache) SetPostings(b ulid.ULID, l labels.Label, v []byte) { + var ( + entrySize = sliceHeaderSize + uint64(len(v)) + cacheType = cacheTypePostings + ) + + c.mtx.Lock() + defer c.mtx.Unlock() + + if !c.ensureFits(entrySize, cacheType) { + c.overflow.WithLabelValues(cacheType).Inc() + return + } + + // The caller may be passing in a sub-slice of a huge array. Copy the data + // to ensure we don't waste huge amounts of space for something small. + cv := make([]byte, len(v)) + copy(cv, v) + key := cacheKey{b, cacheKeyPostings(l)} + c.lru.Add(key, cv) + + c.added.WithLabelValues(cacheType).Inc() + c.currentSize.WithLabelValues(cacheType).Add(float64(entrySize)) + c.totalCurrentSize.WithLabelValues(cacheType).Add(float64(entrySize + key.size())) + c.current.WithLabelValues(cacheType).Inc() + c.curSize += entrySize +} + +func (c *IndexCache) Postings(b ulid.ULID, l labels.Label) ([]byte, bool) { + c.requests.WithLabelValues(cacheTypePostings).Inc() + + c.mtx.Lock() + defer c.mtx.Unlock() + + v, ok := c.lru.Get(cacheKey{b, cacheKeyPostings(l)}) + if !ok { + return nil, false + } + c.hits.WithLabelValues(cacheTypePostings).Inc() + return v.([]byte), true +} + +func (c *IndexCache) SetSeries(b ulid.ULID, id uint64, v []byte) { + var ( + entrySize = 16 + uint64(len(v)) // Slice header + bytes. + cacheType = cacheTypeSeries + ) + + c.mtx.Lock() + defer c.mtx.Unlock() + + if !c.ensureFits(entrySize, cacheType) { + c.overflow.WithLabelValues(cacheType).Inc() + return + } + + // The caller may be passing in a sub-slice of a huge array. Copy the data + // to ensure we don't waste huge amounts of space for something small. + cv := make([]byte, len(v)) + copy(cv, v) + key := cacheKey{b, cacheKeySeries(id)} + c.lru.Add(key, cv) + + c.added.WithLabelValues(cacheType).Inc() + c.currentSize.WithLabelValues(cacheType).Add(float64(entrySize)) + c.totalCurrentSize.WithLabelValues(cacheType).Add(float64(entrySize + key.size())) + c.current.WithLabelValues(cacheType).Inc() + c.curSize += entrySize +} + +func (c *IndexCache) Series(b ulid.ULID, id uint64) ([]byte, bool) { + c.requests.WithLabelValues(cacheTypeSeries).Inc() + + c.mtx.Lock() + defer c.mtx.Unlock() + + v, ok := c.lru.Get(cacheKey{b, cacheKeySeries(id)}) + if !ok { + return nil, false + } + c.hits.WithLabelValues(cacheTypeSeries).Inc() + return v.([]byte), true +} diff --git a/pkg/store/cache/cache_test.go b/pkg/store/cache/cache_test.go new file mode 100644 index 0000000000..e5fe113319 --- /dev/null +++ b/pkg/store/cache/cache_test.go @@ -0,0 +1,273 @@ +// Tests out the index cache implementation. +package storecache + +import ( + "math" + "testing" + "time" + + "github.com/fortytw2/leaktest" + "github.com/go-kit/kit/log" + "github.com/hashicorp/golang-lru/simplelru" + "github.com/improbable-eng/thanos/pkg/testutil" + "github.com/oklog/ulid" + "github.com/prometheus/client_golang/prometheus" + promtest "github.com/prometheus/client_golang/prometheus/testutil" + "github.com/prometheus/tsdb/labels" +) + +func TestIndexCache_AvoidsDeadlock(t *testing.T) { + defer leaktest.CheckTimeout(t, 10*time.Second)() + + metrics := prometheus.NewRegistry() + cache, err := NewIndexCache(log.NewNopLogger(), metrics, Opts{ + MaxItemSizeBytes: sliceHeaderSize + 5, + MaxSizeBytes: sliceHeaderSize + 5, + }) + testutil.Ok(t, err) + + l, err := simplelru.NewLRU(math.MaxInt64, func(key, val interface{}) { + cache.onEvict(key, val) + + // We hack LRU to add back entry on eviction to simulate broken evictions. + cache.lru.Add(key, val) + cache.curSize += sliceHeaderSize + uint64(len(val.([]byte))) // Slice header + bytes. + }) + testutil.Ok(t, err) + cache.lru = l + + cache.SetPostings(ulid.MustNew(0, nil), labels.Label{Name: "test2", Value: "1"}, []byte{42, 33, 14, 67, 11}) + + testutil.Equals(t, float64(0), promtest.ToFloat64(cache.overflow.WithLabelValues(cacheTypePostings))) + testutil.Equals(t, float64(0), promtest.ToFloat64(cache.overflow.WithLabelValues(cacheTypeSeries))) + + // This triggers deadlock logic. + cache.SetPostings(ulid.MustNew(0, nil), labels.Label{Name: "test1", Value: "1"}, []byte{42}) + + testutil.Equals(t, float64(1), promtest.ToFloat64(cache.overflow.WithLabelValues(cacheTypePostings))) + testutil.Equals(t, float64(0), promtest.ToFloat64(cache.overflow.WithLabelValues(cacheTypeSeries))) +} + +// This should not happen as we hardcode math.MaxInt, but we still add test to check this out. +func TestIndexCache_MaxNumberOfItemsHit(t *testing.T) { + defer leaktest.CheckTimeout(t, 10*time.Second)() + + metrics := prometheus.NewRegistry() + cache, err := NewIndexCache(log.NewNopLogger(), metrics, Opts{ + MaxItemSizeBytes: 2*sliceHeaderSize + 10, + MaxSizeBytes: 2*sliceHeaderSize + 10, + }) + testutil.Ok(t, err) + + l, err := simplelru.NewLRU(2, cache.onEvict) + testutil.Ok(t, err) + cache.lru = l + + id := ulid.MustNew(0, nil) + + cache.SetPostings(id, labels.Label{Name: "test", Value: "123"}, []byte{42, 33}) + cache.SetPostings(id, labels.Label{Name: "test", Value: "124"}, []byte{42, 33}) + cache.SetPostings(id, labels.Label{Name: "test", Value: "125"}, []byte{42, 33}) + + testutil.Equals(t, uint64(2*sliceHeaderSize+4), cache.curSize) + testutil.Equals(t, float64(0), promtest.ToFloat64(cache.overflow.WithLabelValues(cacheTypePostings))) + testutil.Equals(t, float64(0), promtest.ToFloat64(cache.overflow.WithLabelValues(cacheTypeSeries))) + testutil.Equals(t, float64(1), promtest.ToFloat64(cache.evicted.WithLabelValues(cacheTypePostings))) + testutil.Equals(t, float64(0), promtest.ToFloat64(cache.evicted.WithLabelValues(cacheTypeSeries))) + testutil.Equals(t, float64(3), promtest.ToFloat64(cache.added.WithLabelValues(cacheTypePostings))) + testutil.Equals(t, float64(0), promtest.ToFloat64(cache.added.WithLabelValues(cacheTypeSeries))) + testutil.Equals(t, float64(0), promtest.ToFloat64(cache.requests.WithLabelValues(cacheTypePostings))) + testutil.Equals(t, float64(0), promtest.ToFloat64(cache.requests.WithLabelValues(cacheTypeSeries))) + testutil.Equals(t, float64(0), promtest.ToFloat64(cache.hits.WithLabelValues(cacheTypePostings))) + testutil.Equals(t, float64(0), promtest.ToFloat64(cache.hits.WithLabelValues(cacheTypeSeries))) +} + +func TestIndexCache_Eviction_WithMetrics(t *testing.T) { + defer leaktest.CheckTimeout(t, 10*time.Second)() + + metrics := prometheus.NewRegistry() + cache, err := NewIndexCache(log.NewNopLogger(), metrics, Opts{ + MaxItemSizeBytes: 2*sliceHeaderSize + 5, + MaxSizeBytes: 2*sliceHeaderSize + 5, + }) + testutil.Ok(t, err) + + id := ulid.MustNew(0, nil) + lbls := labels.Label{Name: "test", Value: "123"} + + _, ok := cache.Postings(id, lbls) + testutil.Assert(t, !ok, "no such key") + + // Add sliceHeaderSize + 2 bytes. + cache.SetPostings(id, lbls, []byte{42, 33}) + testutil.Equals(t, uint64(sliceHeaderSize+2), cache.curSize) + testutil.Equals(t, float64(1), promtest.ToFloat64(cache.current.WithLabelValues(cacheTypePostings))) + testutil.Equals(t, float64(sliceHeaderSize+2), promtest.ToFloat64(cache.currentSize.WithLabelValues(cacheTypePostings))) + testutil.Equals(t, float64(sliceHeaderSize+2+55), promtest.ToFloat64(cache.totalCurrentSize.WithLabelValues(cacheTypePostings))) + testutil.Equals(t, float64(0), promtest.ToFloat64(cache.current.WithLabelValues(cacheTypeSeries))) + testutil.Equals(t, float64(0), promtest.ToFloat64(cache.currentSize.WithLabelValues(cacheTypeSeries))) + testutil.Equals(t, float64(0), promtest.ToFloat64(cache.totalCurrentSize.WithLabelValues(cacheTypeSeries))) + testutil.Equals(t, float64(0), promtest.ToFloat64(cache.overflow.WithLabelValues(cacheTypePostings))) + testutil.Equals(t, float64(0), promtest.ToFloat64(cache.overflow.WithLabelValues(cacheTypeSeries))) + testutil.Equals(t, float64(0), promtest.ToFloat64(cache.evicted.WithLabelValues(cacheTypePostings))) + testutil.Equals(t, float64(0), promtest.ToFloat64(cache.evicted.WithLabelValues(cacheTypeSeries))) + + p, ok := cache.Postings(id, lbls) + testutil.Assert(t, ok, "key exists") + testutil.Equals(t, []byte{42, 33}, p) + + _, ok = cache.Postings(ulid.MustNew(1, nil), lbls) + testutil.Assert(t, !ok, "no such key") + _, ok = cache.Postings(id, labels.Label{Name: "test", Value: "124"}) + testutil.Assert(t, !ok, "no such key") + + // Add sliceHeaderSize + 3 more bytes. + cache.SetSeries(id, 1234, []byte{222, 223, 224}) + testutil.Equals(t, uint64(2*sliceHeaderSize+5), cache.curSize) + testutil.Equals(t, float64(1), promtest.ToFloat64(cache.current.WithLabelValues(cacheTypePostings))) + testutil.Equals(t, float64(sliceHeaderSize+2), promtest.ToFloat64(cache.currentSize.WithLabelValues(cacheTypePostings))) + testutil.Equals(t, float64(sliceHeaderSize+2+55), promtest.ToFloat64(cache.totalCurrentSize.WithLabelValues(cacheTypePostings))) + testutil.Equals(t, float64(1), promtest.ToFloat64(cache.current.WithLabelValues(cacheTypeSeries))) + testutil.Equals(t, float64(sliceHeaderSize+3), promtest.ToFloat64(cache.currentSize.WithLabelValues(cacheTypeSeries))) + testutil.Equals(t, float64(sliceHeaderSize+3+24), promtest.ToFloat64(cache.totalCurrentSize.WithLabelValues(cacheTypeSeries))) + testutil.Equals(t, float64(0), promtest.ToFloat64(cache.overflow.WithLabelValues(cacheTypePostings))) + testutil.Equals(t, float64(0), promtest.ToFloat64(cache.overflow.WithLabelValues(cacheTypeSeries))) + testutil.Equals(t, float64(0), promtest.ToFloat64(cache.evicted.WithLabelValues(cacheTypePostings))) + testutil.Equals(t, float64(0), promtest.ToFloat64(cache.evicted.WithLabelValues(cacheTypeSeries))) + + p, ok = cache.Series(id, 1234) + testutil.Assert(t, ok, "key exists") + testutil.Equals(t, []byte{222, 223, 224}, p) + + lbls2 := labels.Label{Name: "test", Value: "124"} + + // Add sliceHeaderSize + 5 + 16 bytes, should fully evict 2 last items. + v := []byte{42, 33, 14, 67, 11} + for i := 0; i < sliceHeaderSize; i++ { + v = append(v, 3) + } + cache.SetPostings(id, lbls2, v) + + testutil.Equals(t, uint64(2*sliceHeaderSize+5), cache.curSize) + testutil.Equals(t, float64(1), promtest.ToFloat64(cache.current.WithLabelValues(cacheTypePostings))) + testutil.Equals(t, float64(2*sliceHeaderSize+5), promtest.ToFloat64(cache.currentSize.WithLabelValues(cacheTypePostings))) + testutil.Equals(t, float64(2*sliceHeaderSize+5+55), promtest.ToFloat64(cache.totalCurrentSize.WithLabelValues(cacheTypePostings))) + testutil.Equals(t, float64(0), promtest.ToFloat64(cache.current.WithLabelValues(cacheTypeSeries))) + testutil.Equals(t, float64(0), promtest.ToFloat64(cache.currentSize.WithLabelValues(cacheTypeSeries))) + testutil.Equals(t, float64(0), promtest.ToFloat64(cache.totalCurrentSize.WithLabelValues(cacheTypeSeries))) + testutil.Equals(t, float64(0), promtest.ToFloat64(cache.overflow.WithLabelValues(cacheTypePostings))) + testutil.Equals(t, float64(0), promtest.ToFloat64(cache.overflow.WithLabelValues(cacheTypeSeries))) + testutil.Equals(t, float64(1), promtest.ToFloat64(cache.evicted.WithLabelValues(cacheTypePostings))) // Eviction. + testutil.Equals(t, float64(1), promtest.ToFloat64(cache.evicted.WithLabelValues(cacheTypeSeries))) // Eviction. + + // Evicted. + _, ok = cache.Postings(id, lbls) + testutil.Assert(t, !ok, "no such key") + _, ok = cache.Series(id, 1234) + testutil.Assert(t, !ok, "no such key") + + p, ok = cache.Postings(id, lbls2) + testutil.Assert(t, ok, "key exists") + testutil.Equals(t, v, p) + + // Add same item again. + // NOTE: In our caller code, we always check first hit, then we claim miss and set posting so this should not happen. + // That's why this case is not optimized and we evict + re add the item. + cache.SetPostings(id, lbls2, v) + + testutil.Equals(t, uint64(2*sliceHeaderSize+5), cache.curSize) + testutil.Equals(t, float64(1), promtest.ToFloat64(cache.current.WithLabelValues(cacheTypePostings))) + testutil.Equals(t, float64(2*sliceHeaderSize+5), promtest.ToFloat64(cache.currentSize.WithLabelValues(cacheTypePostings))) + testutil.Equals(t, float64(2*sliceHeaderSize+5+55), promtest.ToFloat64(cache.totalCurrentSize.WithLabelValues(cacheTypePostings))) + testutil.Equals(t, float64(0), promtest.ToFloat64(cache.current.WithLabelValues(cacheTypeSeries))) + testutil.Equals(t, float64(0), promtest.ToFloat64(cache.currentSize.WithLabelValues(cacheTypeSeries))) + testutil.Equals(t, float64(0), promtest.ToFloat64(cache.totalCurrentSize.WithLabelValues(cacheTypeSeries))) + testutil.Equals(t, float64(0), promtest.ToFloat64(cache.overflow.WithLabelValues(cacheTypePostings))) + testutil.Equals(t, float64(0), promtest.ToFloat64(cache.overflow.WithLabelValues(cacheTypeSeries))) + testutil.Equals(t, float64(2), promtest.ToFloat64(cache.evicted.WithLabelValues(cacheTypePostings))) // Eviction. + testutil.Equals(t, float64(1), promtest.ToFloat64(cache.evicted.WithLabelValues(cacheTypeSeries))) + + p, ok = cache.Postings(id, lbls2) + testutil.Assert(t, ok, "key exists") + testutil.Equals(t, v, p) + + // Add too big item. + cache.SetPostings(id, labels.Label{Name: "test", Value: "toobig"}, append(v, 5)) + testutil.Equals(t, uint64(2*sliceHeaderSize+5), cache.curSize) + testutil.Equals(t, float64(1), promtest.ToFloat64(cache.current.WithLabelValues(cacheTypePostings))) + testutil.Equals(t, float64(2*sliceHeaderSize+5), promtest.ToFloat64(cache.currentSize.WithLabelValues(cacheTypePostings))) + testutil.Equals(t, float64(2*sliceHeaderSize+5+55), promtest.ToFloat64(cache.totalCurrentSize.WithLabelValues(cacheTypePostings))) + testutil.Equals(t, float64(0), promtest.ToFloat64(cache.current.WithLabelValues(cacheTypeSeries))) + testutil.Equals(t, float64(0), promtest.ToFloat64(cache.currentSize.WithLabelValues(cacheTypeSeries))) + testutil.Equals(t, float64(0), promtest.ToFloat64(cache.totalCurrentSize.WithLabelValues(cacheTypeSeries))) + testutil.Equals(t, float64(1), promtest.ToFloat64(cache.overflow.WithLabelValues(cacheTypePostings))) // Overflow. + testutil.Equals(t, float64(0), promtest.ToFloat64(cache.overflow.WithLabelValues(cacheTypeSeries))) + testutil.Equals(t, float64(2), promtest.ToFloat64(cache.evicted.WithLabelValues(cacheTypePostings))) + testutil.Equals(t, float64(1), promtest.ToFloat64(cache.evicted.WithLabelValues(cacheTypeSeries))) + + _, _, ok = cache.lru.RemoveOldest() + testutil.Assert(t, ok, "something to remove") + + testutil.Equals(t, uint64(0), cache.curSize) + testutil.Equals(t, float64(0), promtest.ToFloat64(cache.current.WithLabelValues(cacheTypePostings))) + testutil.Equals(t, float64(0), promtest.ToFloat64(cache.currentSize.WithLabelValues(cacheTypePostings))) + testutil.Equals(t, float64(0), promtest.ToFloat64(cache.totalCurrentSize.WithLabelValues(cacheTypePostings))) + testutil.Equals(t, float64(0), promtest.ToFloat64(cache.current.WithLabelValues(cacheTypeSeries))) + testutil.Equals(t, float64(0), promtest.ToFloat64(cache.currentSize.WithLabelValues(cacheTypeSeries))) + testutil.Equals(t, float64(0), promtest.ToFloat64(cache.totalCurrentSize.WithLabelValues(cacheTypeSeries))) + testutil.Equals(t, float64(1), promtest.ToFloat64(cache.overflow.WithLabelValues(cacheTypePostings))) + testutil.Equals(t, float64(0), promtest.ToFloat64(cache.overflow.WithLabelValues(cacheTypeSeries))) + testutil.Equals(t, float64(3), promtest.ToFloat64(cache.evicted.WithLabelValues(cacheTypePostings))) + testutil.Equals(t, float64(1), promtest.ToFloat64(cache.evicted.WithLabelValues(cacheTypeSeries))) + + _, _, ok = cache.lru.RemoveOldest() + testutil.Assert(t, !ok, "nothing to remove") + + lbls3 := labels.Label{Name: "test", Value: "124"} + + cache.SetPostings(id, lbls3, []byte{}) + + testutil.Equals(t, uint64(sliceHeaderSize), cache.curSize) + testutil.Equals(t, float64(1), promtest.ToFloat64(cache.current.WithLabelValues(cacheTypePostings))) + testutil.Equals(t, float64(sliceHeaderSize), promtest.ToFloat64(cache.currentSize.WithLabelValues(cacheTypePostings))) + testutil.Equals(t, float64(sliceHeaderSize+55), promtest.ToFloat64(cache.totalCurrentSize.WithLabelValues(cacheTypePostings))) + testutil.Equals(t, float64(0), promtest.ToFloat64(cache.current.WithLabelValues(cacheTypeSeries))) + testutil.Equals(t, float64(0), promtest.ToFloat64(cache.currentSize.WithLabelValues(cacheTypeSeries))) + testutil.Equals(t, float64(0), promtest.ToFloat64(cache.totalCurrentSize.WithLabelValues(cacheTypeSeries))) + testutil.Equals(t, float64(1), promtest.ToFloat64(cache.overflow.WithLabelValues(cacheTypePostings))) + testutil.Equals(t, float64(0), promtest.ToFloat64(cache.overflow.WithLabelValues(cacheTypeSeries))) + testutil.Equals(t, float64(3), promtest.ToFloat64(cache.evicted.WithLabelValues(cacheTypePostings))) + testutil.Equals(t, float64(1), promtest.ToFloat64(cache.evicted.WithLabelValues(cacheTypeSeries))) + + p, ok = cache.Postings(id, lbls3) + testutil.Assert(t, ok, "key exists") + testutil.Equals(t, []byte{}, p) + + // nil works and still allocates empty slice. + lbls4 := labels.Label{Name: "test", Value: "125"} + cache.SetPostings(id, lbls4, []byte(nil)) + + testutil.Equals(t, 2*uint64(sliceHeaderSize), cache.curSize) + testutil.Equals(t, float64(2), promtest.ToFloat64(cache.current.WithLabelValues(cacheTypePostings))) + testutil.Equals(t, 2*float64(sliceHeaderSize), promtest.ToFloat64(cache.currentSize.WithLabelValues(cacheTypePostings))) + testutil.Equals(t, 2*float64(sliceHeaderSize+55), promtest.ToFloat64(cache.totalCurrentSize.WithLabelValues(cacheTypePostings))) + testutil.Equals(t, float64(0), promtest.ToFloat64(cache.current.WithLabelValues(cacheTypeSeries))) + testutil.Equals(t, float64(0), promtest.ToFloat64(cache.currentSize.WithLabelValues(cacheTypeSeries))) + testutil.Equals(t, float64(0), promtest.ToFloat64(cache.totalCurrentSize.WithLabelValues(cacheTypeSeries))) + testutil.Equals(t, float64(1), promtest.ToFloat64(cache.overflow.WithLabelValues(cacheTypePostings))) + testutil.Equals(t, float64(0), promtest.ToFloat64(cache.overflow.WithLabelValues(cacheTypeSeries))) + testutil.Equals(t, float64(3), promtest.ToFloat64(cache.evicted.WithLabelValues(cacheTypePostings))) + testutil.Equals(t, float64(1), promtest.ToFloat64(cache.evicted.WithLabelValues(cacheTypeSeries))) + + p, ok = cache.Postings(id, lbls4) + testutil.Assert(t, ok, "key exists") + testutil.Equals(t, []byte{}, p) + + // Other metrics. + testutil.Equals(t, float64(5), promtest.ToFloat64(cache.added.WithLabelValues(cacheTypePostings))) + testutil.Equals(t, float64(1), promtest.ToFloat64(cache.added.WithLabelValues(cacheTypeSeries))) + testutil.Equals(t, float64(9), promtest.ToFloat64(cache.requests.WithLabelValues(cacheTypePostings))) + testutil.Equals(t, float64(2), promtest.ToFloat64(cache.requests.WithLabelValues(cacheTypeSeries))) + testutil.Equals(t, float64(5), promtest.ToFloat64(cache.hits.WithLabelValues(cacheTypePostings))) + testutil.Equals(t, float64(1), promtest.ToFloat64(cache.hits.WithLabelValues(cacheTypeSeries))) +} diff --git a/pkg/store/cache_test.go b/pkg/store/cache_test.go deleted file mode 100644 index 0c3d241668..0000000000 --- a/pkg/store/cache_test.go +++ /dev/null @@ -1,61 +0,0 @@ -// Tests out the index cache implementation. -package store - -import ( - "testing" - "time" - - "github.com/fortytw2/leaktest" - "github.com/improbable-eng/thanos/pkg/testutil" - "github.com/oklog/ulid" - "github.com/prometheus/client_golang/prometheus" - "github.com/prometheus/tsdb/labels" -) - -// TestIndexCacheEdge tests the index cache edge cases. -func TestIndexCacheEdge(t *testing.T) { - metrics := prometheus.NewRegistry() - cache, err := newIndexCache(metrics, 1) - testutil.Ok(t, err) - - fits := cache.ensureFits([]byte{42, 24}) - testutil.Equals(t, fits, false) - - fits = cache.ensureFits([]byte{42}) - testutil.Equals(t, fits, true) -} - -// TestIndexCacheSmoke runs the smoke tests for the index cache. -func TestIndexCacheSmoke(t *testing.T) { - defer leaktest.CheckTimeout(t, 10*time.Second)() - - metrics := prometheus.NewRegistry() - cache, err := newIndexCache(metrics, 20) - testutil.Ok(t, err) - - blid := ulid.ULID([16]byte{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}) - labels := labels.Label{Name: "test", Value: "123"} - - cache.setPostings(blid, labels, []byte{42}) - - p, ok := cache.postings(blid, labels) - testutil.Equals(t, ok, true) - testutil.Equals(t, p, []byte{42}) - testutil.Equals(t, cache.curSize, uint64(1)) - - cache.setSeries(blid, 1234, []byte{42, 42}) - - s, ok := cache.series(blid, 1234) - testutil.Equals(t, ok, true) - testutil.Equals(t, s, []byte{42, 42}) - testutil.Equals(t, cache.curSize, uint64(3)) - - cache.lru.RemoveOldest() - testutil.Equals(t, cache.curSize, uint64(2)) - - cache.lru.RemoveOldest() - testutil.Equals(t, cache.curSize, uint64(0)) - - cache.lru.RemoveOldest() - testutil.Equals(t, cache.curSize, uint64(0)) -} diff --git a/pkg/store/prometheus.go b/pkg/store/prometheus.go index e8fd677f53..d79c7a74ed 100644 --- a/pkg/store/prometheus.go +++ b/pkg/store/prometheus.go @@ -29,6 +29,14 @@ import ( "google.golang.org/grpc/status" ) +var statusToCode = map[int]codes.Code{ + http.StatusBadRequest: codes.InvalidArgument, + http.StatusNotFound: codes.NotFound, + http.StatusUnprocessableEntity: codes.Internal, + http.StatusServiceUnavailable: codes.Unavailable, + http.StatusInternalServerError: codes.Internal, +} + // PrometheusStore implements the store node API on top of the Prometheus remote read API. type PrometheusStore struct { logger log.Logger @@ -163,6 +171,7 @@ func (p *PrometheusStore) Series(r *storepb.SeriesRequest, s storepb.Store_Serie // XOR encoding supports a max size of 2^16 - 1 samples, so we need // to chunk all samples into groups of no more than 2^16 - 1 + // See: https://github.com/improbable-eng/thanos/pull/718 aggregatedChunks, err := p.chunkSamples(e, math.MaxUint16) if err != nil { return err @@ -179,14 +188,13 @@ func (p *PrometheusStore) Series(r *storepb.SeriesRequest, s storepb.Store_Serie return nil } -func (p *PrometheusStore) chunkSamples(series prompb.TimeSeries, samplesPerChunk int) ([]storepb.AggrChunk, error) { - var aggregatedChunks []storepb.AggrChunk +func (p *PrometheusStore) chunkSamples(series prompb.TimeSeries, maxSamplesPerChunk int) (chks []storepb.AggrChunk, err error) { samples := series.Samples for len(samples) > 0 { chunkSize := len(samples) - if chunkSize > samplesPerChunk { - chunkSize = samplesPerChunk + if chunkSize > maxSamplesPerChunk { + chunkSize = maxSamplesPerChunk } enc, cb, err := p.encodeChunk(samples[:chunkSize]) @@ -194,7 +202,7 @@ func (p *PrometheusStore) chunkSamples(series prompb.TimeSeries, samplesPerChunk return nil, status.Error(codes.Unknown, err.Error()) } - aggregatedChunks = append(aggregatedChunks, storepb.AggrChunk{ + chks = append(chks, storepb.AggrChunk{ MinTime: int64(samples[0].Timestamp), MaxTime: int64(samples[chunkSize-1].Timestamp), Raw: &storepb.Chunk{Type: enc, Data: cb}, @@ -203,7 +211,7 @@ func (p *PrometheusStore) chunkSamples(series prompb.TimeSeries, samplesPerChunk samples = samples[chunkSize:] } - return aggregatedChunks, nil + return chks, nil } func (p *PrometheusStore) promSeries(ctx context.Context, q prompb.Query) (*prompb.ReadResponse, error) { @@ -336,10 +344,52 @@ func extendLset(lset []storepb.Label, extend labels.Labels) []storepb.Label { } // LabelNames returns all known label names. -func (p *PrometheusStore) LabelNames(ctx context.Context, r *storepb.LabelNamesRequest) ( +func (p *PrometheusStore) LabelNames(ctx context.Context, _ *storepb.LabelNamesRequest) ( *storepb.LabelNamesResponse, error, ) { - return nil, status.Error(codes.Unimplemented, "not implemented") + u := *p.base + u.Path = path.Join(u.Path, "/api/v1/labels") + + req, err := http.NewRequest("GET", u.String(), nil) + if err != nil { + return nil, status.Error(codes.Internal, err.Error()) + } + + span, ctx := tracing.StartSpan(ctx, "/prom_label_names HTTP[client]") + defer span.Finish() + + resp, err := p.client.Do(req.WithContext(ctx)) + if err != nil { + return nil, status.Error(codes.Internal, err.Error()) + } + defer runutil.CloseWithLogOnErr(p.logger, resp.Body, "label names request body") + + if resp.StatusCode/100 != 2 { + return nil, status.Error(codes.Internal, fmt.Sprintf("request Prometheus server failed, code %s", resp.Status)) + } + + if resp.StatusCode == http.StatusNoContent { + return &storepb.LabelNamesResponse{Names: []string{}}, nil + } + + var m struct { + Data []string `json:"data"` + Status string `json:"status"` + Error string `json:"error"` + } + if err := json.NewDecoder(resp.Body).Decode(&m); err != nil { + return nil, status.Error(codes.Internal, err.Error()) + } + + if m.Status != "success" { + code, exists := statusToCode[resp.StatusCode] + if !exists { + return nil, status.Error(codes.Internal, m.Error) + } + return nil, status.Error(code, m.Error) + } + + return &storepb.LabelNamesResponse{Names: m.Data}, nil } // LabelValues returns all known label values for a given label name. @@ -356,7 +406,7 @@ func (p *PrometheusStore) LabelValues(ctx context.Context, r *storepb.LabelValue req, err := http.NewRequest("GET", u.String(), nil) if err != nil { - return nil, status.Error(codes.Unknown, err.Error()) + return nil, status.Error(codes.Internal, err.Error()) } span, ctx := tracing.StartSpan(ctx, "/prom_label_values HTTP[client]") @@ -364,17 +414,36 @@ func (p *PrometheusStore) LabelValues(ctx context.Context, r *storepb.LabelValue resp, err := p.client.Do(req.WithContext(ctx)) if err != nil { - return nil, status.Error(codes.Unknown, err.Error()) + return nil, status.Error(codes.Internal, err.Error()) } defer runutil.CloseWithLogOnErr(p.logger, resp.Body, "label values request body") + if resp.StatusCode/100 != 2 { + return nil, status.Error(codes.Internal, fmt.Sprintf("request Prometheus server failed, code %s", resp.Status)) + } + + if resp.StatusCode == http.StatusNoContent { + return &storepb.LabelValuesResponse{Values: []string{}}, nil + } + var m struct { - Data []string `json:"data"` + Data []string `json:"data"` + Status string `json:"status"` + Error string `json:"error"` } if err := json.NewDecoder(resp.Body).Decode(&m); err != nil { - return nil, status.Error(codes.Unknown, err.Error()) + return nil, status.Error(codes.Internal, err.Error()) } + sort.Strings(m.Data) + if m.Status != "success" { + code, exists := statusToCode[resp.StatusCode] + if !exists { + return nil, status.Error(codes.Internal, m.Error) + } + return nil, status.Error(code, m.Error) + } + return &storepb.LabelValuesResponse{Values: m.Data}, nil } diff --git a/pkg/store/prometheus_test.go b/pkg/store/prometheus_test.go index 44693411a7..b6c812ed17 100644 --- a/pkg/store/prometheus_test.go +++ b/pkg/store/prometheus_test.go @@ -13,6 +13,7 @@ import ( "github.com/improbable-eng/thanos/pkg/store/storepb" "github.com/improbable-eng/thanos/pkg/testutil" "github.com/prometheus/prometheus/pkg/timestamp" + "github.com/prometheus/tsdb" "github.com/prometheus/tsdb/chunkenc" "github.com/prometheus/tsdb/labels" ) @@ -277,42 +278,24 @@ func TestPrometheusStore_Info(t *testing.T) { testutil.Equals(t, int64(456), resp.MaxTime) } -// Regression test for https://github.com/improbable-eng/thanos/issues/396. -func TestPrometheusStore_Series_SplitSamplesIntoChunksWithMaxSizeOfUint16_e2e(t *testing.T) { - defer leaktest.CheckTimeout(t, 10*time.Second)() - - p, err := testutil.NewPrometheus() - testutil.Ok(t, err) - +func testSeries_SplitSamplesIntoChunksWithMaxSizeOfUint16_e2e(t *testing.T, appender tsdb.Appender, newStore func() storepb.StoreServer) { baseT := timestamp.FromTime(time.Now().AddDate(0, 0, -2)) / 1000 * 1000 - a := p.Appender() - offset := int64(2*math.MaxUint16 + 5) for i := int64(0); i < offset; i++ { - _, err = a.Add(labels.FromStrings("a", "b"), baseT+i, 1) + _, err := appender.Add(labels.FromStrings("a", "b"), baseT+i, 1) testutil.Ok(t, err) } - testutil.Ok(t, a.Commit()) + testutil.Ok(t, appender.Commit()) ctx, cancel := context.WithCancel(context.Background()) defer cancel() - testutil.Ok(t, p.Start()) - defer func() { testutil.Ok(t, p.Stop()) }() - - u, err := url.Parse(fmt.Sprintf("http://%s", p.Addr())) - testutil.Ok(t, err) - - proxy, err := NewPrometheusStore(nil, nil, u, component.Sidecar, - func() labels.Labels { - return labels.FromStrings("region", "eu-west") - }, nil) - testutil.Ok(t, err) + client := newStore() srv := newStoreSeriesServer(ctx) - testutil.Ok(t, proxy.Series(&storepb.SeriesRequest{ + testutil.Ok(t, client.Series(&storepb.SeriesRequest{ MinTime: baseT, MaxTime: baseT + offset, Matchers: []storepb.LabelMatcher{ @@ -344,3 +327,27 @@ func TestPrometheusStore_Series_SplitSamplesIntoChunksWithMaxSizeOfUint16_e2e(t testutil.Ok(t, err) testutil.Equals(t, 5, chunk.NumSamples()) } + +// Regression test for https://github.com/improbable-eng/thanos/issues/396. +func TestPrometheusStore_Series_SplitSamplesIntoChunksWithMaxSizeOfUint16_e2e(t *testing.T) { + defer leaktest.CheckTimeout(t, 10*time.Second)() + + p, err := testutil.NewPrometheus() + testutil.Ok(t, err) + defer func() { testutil.Ok(t, p.Stop()) }() + + testSeries_SplitSamplesIntoChunksWithMaxSizeOfUint16_e2e(t, p.Appender(), func() storepb.StoreServer { + testutil.Ok(t, p.Start()) + + u, err := url.Parse(fmt.Sprintf("http://%s", p.Addr())) + testutil.Ok(t, err) + + proxy, err := NewPrometheusStore(nil, nil, u, component.Sidecar, + func() labels.Labels { + return labels.FromStrings("region", "eu-west") + }, nil) + testutil.Ok(t, err) + + return proxy + }) +} diff --git a/pkg/store/proxy.go b/pkg/store/proxy.go index 9324d62247..b9fd49edf0 100644 --- a/pkg/store/proxy.go +++ b/pkg/store/proxy.go @@ -294,10 +294,6 @@ func startStreamSeriesSet( return } - if ctx.Err() != nil { - return - } - if err != nil { wrapErr := errors.Wrapf(err, "receive series from %s", s.name) if partialResponse { @@ -315,7 +311,14 @@ func startStreamSeriesSet( s.warnCh.send(storepb.NewWarnSeriesResponse(errors.New(w))) continue } - s.recvCh <- r.GetSeries() + + select { + case s.recvCh <- r.GetSeries(): + continue + case <-ctx.Done(): + return + } + } }() return s @@ -397,7 +400,48 @@ func storeMatches(s Client, mint, maxt int64, matchers ...storepb.LabelMatcher) func (s *ProxyStore) LabelNames(ctx context.Context, r *storepb.LabelNamesRequest) ( *storepb.LabelNamesResponse, error, ) { - return nil, status.Error(codes.Unimplemented, "not implemented") + var ( + warnings []string + names [][]string + mtx sync.Mutex + g, gctx = errgroup.WithContext(ctx) + ) + + for _, st := range s.stores() { + st := st + g.Go(func() error { + resp, err := st.LabelNames(gctx, &storepb.LabelNamesRequest{ + PartialResponseDisabled: r.PartialResponseDisabled, + }) + if err != nil { + err = errors.Wrapf(err, "fetch label names from store %s", st) + if r.PartialResponseDisabled { + return err + } + + mtx.Lock() + warnings = append(warnings, err.Error()) + mtx.Unlock() + return nil + } + + mtx.Lock() + warnings = append(warnings, resp.Warnings...) + names = append(names, resp.Names) + mtx.Unlock() + + return nil + }) + } + + if err := g.Wait(); err != nil { + return nil, err + } + + return &storepb.LabelNamesResponse{ + Names: strutil.MergeUnsortedSlices(names...), + Warnings: warnings, + }, nil } // LabelValues returns all known label values for a given label name. @@ -415,7 +459,7 @@ func (s *ProxyStore) LabelValues(ctx context.Context, r *storepb.LabelValuesRequ store := st g.Go(func() error { resp, err := store.LabelValues(gctx, &storepb.LabelValuesRequest{ - Label: r.Label, + Label: r.Label, PartialResponseDisabled: r.PartialResponseDisabled, }) if err != nil { diff --git a/pkg/store/proxy_test.go b/pkg/store/proxy_test.go index ed1c1d3b0a..caaf6ff8bc 100644 --- a/pkg/store/proxy_test.go +++ b/pkg/store/proxy_test.go @@ -687,6 +687,113 @@ func TestProxyStore_LabelValues(t *testing.T) { testutil.Equals(t, 1, len(resp.Warnings)) } +func TestProxyStore_LabelNames(t *testing.T) { + defer leaktest.CheckTimeout(t, 10*time.Second)() + + for _, tc := range []struct { + title string + storeAPIs []Client + + req *storepb.LabelNamesRequest + + expectedNames []string + expectedErr error + expectedWarningsLen int + }{ + { + title: "label_names partial response disabled", + storeAPIs: []Client{ + &testClient{ + StoreClient: &mockedStoreAPI{ + RespLabelNames: &storepb.LabelNamesResponse{ + Names: []string{"a", "b"}, + }, + }, + }, + &testClient{ + StoreClient: &mockedStoreAPI{ + RespLabelNames: &storepb.LabelNamesResponse{ + Names: []string{"a", "c", "d"}, + }, + }, + }, + }, + req: &storepb.LabelNamesRequest{ + PartialResponseDisabled: true, + }, + expectedNames: []string{"a", "b", "c", "d"}, + expectedWarningsLen: 0, + }, + { + title: "label_names partial response disabled, but returns error", + storeAPIs: []Client{ + &testClient{ + StoreClient: &mockedStoreAPI{ + RespLabelNames: &storepb.LabelNamesResponse{ + Names: []string{"a", "b"}, + }, + }, + }, + &testClient{ + StoreClient: &mockedStoreAPI{ + RespError: errors.New("error!"), + }, + }, + }, + req: &storepb.LabelNamesRequest{ + PartialResponseDisabled: true, + }, + expectedErr: errors.New("fetch label names from store test: error!"), + }, + { + title: "label_names partial response enabled", + storeAPIs: []Client{ + &testClient{ + StoreClient: &mockedStoreAPI{ + RespLabelNames: &storepb.LabelNamesResponse{ + Names: []string{"a", "b"}, + }, + }, + }, + &testClient{ + StoreClient: &mockedStoreAPI{ + RespError: errors.New("error!"), + }, + }, + }, + req: &storepb.LabelNamesRequest{ + PartialResponseDisabled: false, + }, + expectedNames: []string{"a", "b"}, + expectedWarningsLen: 1, + }, + } { + if ok := t.Run(tc.title, func(t *testing.T) { + q := NewProxyStore( + nil, + func() []Client { return tc.storeAPIs }, + component.Query, + nil, + 0*time.Second, + ) + + ctx := context.Background() + resp, err := q.LabelNames(ctx, tc.req) + if tc.expectedErr != nil { + testutil.NotOk(t, err) + testutil.Equals(t, tc.expectedErr.Error(), err.Error()) + return + } + testutil.Ok(t, err) + + testutil.Equals(t, tc.expectedNames, resp.Names) + testutil.Equals(t, tc.expectedWarningsLen, len(resp.Warnings), "got %v", resp.Warnings) + }); !ok { + return + } + } +} + type rawSeries struct { lset []storepb.Label samples []sample @@ -830,11 +937,13 @@ func (s *storeSeriesServer) Context() context.Context { type mockedStoreAPI struct { RespSeries []*storepb.SeriesResponse RespLabelValues *storepb.LabelValuesResponse + RespLabelNames *storepb.LabelNamesResponse RespError error RespDuration time.Duration LastSeriesReq *storepb.SeriesRequest LastLabelValuesReq *storepb.LabelValuesRequest + LastLabelNamesReq *storepb.LabelNamesRequest } func (s *mockedStoreAPI) Info(ctx context.Context, req *storepb.InfoRequest, _ ...grpc.CallOption) (*storepb.InfoResponse, error) { @@ -848,7 +957,9 @@ func (s *mockedStoreAPI) Series(ctx context.Context, req *storepb.SeriesRequest, } func (s *mockedStoreAPI) LabelNames(ctx context.Context, req *storepb.LabelNamesRequest, _ ...grpc.CallOption) (*storepb.LabelNamesResponse, error) { - return nil, status.Error(codes.Unimplemented, "not implemented") + s.LastLabelNamesReq = req + + return s.RespLabelNames, s.RespError } func (s *mockedStoreAPI) LabelValues(ctx context.Context, req *storepb.LabelValuesRequest, _ ...grpc.CallOption) (*storepb.LabelValuesResponse, error) { diff --git a/pkg/store/tsdb.go b/pkg/store/tsdb.go index 1a5b5f820b..cad2450875 100644 --- a/pkg/store/tsdb.go +++ b/pkg/store/tsdb.go @@ -76,10 +76,6 @@ func (s *TSDBStore) Series(r *storepb.SeriesRequest, srv storepb.Store_SeriesSer return status.Error(codes.InvalidArgument, err.Error()) } - // TODO(fabxc): An improvement over this trivial approach would be to directly - // use the chunks provided by TSDB in the response. - // But since the sidecar has a similar approach, optimizing here has only - // limited benefit for now. q, err := s.db.Querier(r.MinTime, r.MaxTime) if err != nil { return status.Error(codes.Internal, err.Error()) @@ -96,13 +92,20 @@ func (s *TSDBStore) Series(r *storepb.SeriesRequest, srv storepb.Store_SeriesSer for set.Next() { series := set.At() - c, err := s.encodeChunk(series.Iterator()) + // TODO(fabxc): An improvement over this trivial approach would be to directly + // use the chunks provided by TSDB in the response. + // But since the sidecar has a similar approach, optimizing here has only + // limited benefit for now. + // NOTE: XOR encoding supports a max size of 2^16 - 1 samples, so we need + // to chunk all samples into groups of no more than 2^16 - 1 + // See: https://github.com/improbable-eng/thanos/pull/1038 + c, err := s.encodeChunks(series.Iterator(), math.MaxUint16) if err != nil { return status.Errorf(codes.Internal, "encode chunk: %s", err) } respSeries.Labels = s.translateAndExtendLabels(series.Labels(), s.labels) - respSeries.Chunks = append(respSeries.Chunks[:0], c) + respSeries.Chunks = append(respSeries.Chunks[:0], c...) if err := srv.Send(storepb.NewSeriesResponse(&respSeries)); err != nil { return status.Error(codes.Aborted, err.Error()) @@ -111,31 +114,46 @@ func (s *TSDBStore) Series(r *storepb.SeriesRequest, srv storepb.Store_SeriesSer return nil } -func (s *TSDBStore) encodeChunk(it tsdb.SeriesIterator) (storepb.AggrChunk, error) { - chk := chunkenc.NewXORChunk() +func (s *TSDBStore) encodeChunks(it tsdb.SeriesIterator, maxSamplesPerChunk int) (chks []storepb.AggrChunk, err error) { + var ( + chkMint int64 + chk *chunkenc.XORChunk + app chunkenc.Appender + isNext = it.Next() + ) + + for isNext { + if chk == nil { + chk = chunkenc.NewXORChunk() + app, err = chk.Appender() + if err != nil { + return nil, err + } + chkMint, _ = it.At() + } - app, err := chk.Appender() - if err != nil { - return storepb.AggrChunk{}, err - } - var mint int64 + app.Append(it.At()) + chkMaxt, _ := it.At() - for i := 0; it.Next(); i++ { - if i == 0 { - mint, _ = it.At() + isNext = it.Next() + if isNext && chk.NumSamples() < maxSamplesPerChunk { + continue } - app.Append(it.At()) + + // Cut the chunk. + chks = append(chks, storepb.AggrChunk{ + MinTime: chkMint, + MaxTime: chkMaxt, + Raw: &storepb.Chunk{Type: storepb.Chunk_XOR, Data: chk.Bytes()}, + }) + chk = nil } if it.Err() != nil { - return storepb.AggrChunk{}, errors.Wrap(it.Err(), "read series") + return nil, errors.Wrap(it.Err(), "read TSDB series") } - maxt, _ := it.At() - return storepb.AggrChunk{ - MinTime: mint, - MaxTime: maxt, - Raw: &storepb.Chunk{Type: storepb.Chunk_XOR, Data: chk.Bytes()}, - }, nil + return chks, nil + } // translateAndExtendLabels transforms a metrics into a protobuf label set. It additionally @@ -165,10 +183,20 @@ func (s *TSDBStore) translateAndExtendLabels(m, extend labels.Labels) []storepb. } // LabelNames returns all known label names. -func (s *TSDBStore) LabelNames(ctx context.Context, r *storepb.LabelNamesRequest) ( +func (s *TSDBStore) LabelNames(ctx context.Context, _ *storepb.LabelNamesRequest) ( *storepb.LabelNamesResponse, error, ) { - return nil, status.Error(codes.Unimplemented, "not implemented") + q, err := s.db.Querier(math.MinInt64, math.MaxInt64) + if err != nil { + return nil, status.Error(codes.Internal, err.Error()) + } + defer runutil.CloseWithLogOnErr(s.logger, q, "close tsdb querier label names") + + res, err := q.LabelNames() + if err != nil { + return nil, status.Error(codes.Internal, err.Error()) + } + return &storepb.LabelNamesResponse{Names: res}, nil } // LabelValues returns all known label values for a given label name. diff --git a/pkg/store/tsdb_test.go b/pkg/store/tsdb_test.go index 4334bedaea..345def101e 100644 --- a/pkg/store/tsdb_test.go +++ b/pkg/store/tsdb_test.go @@ -33,3 +33,18 @@ func TestTSDBStore_Info(t *testing.T) { testutil.Equals(t, int64(0), resp.MinTime) testutil.Equals(t, int64(math.MaxInt64), resp.MaxTime) } + +// Regression test for https://github.com/improbable-eng/thanos/issues/1038. +func TestTSDBStore_Series_SplitSamplesIntoChunksWithMaxSizeOfUint16_e2e(t *testing.T) { + defer leaktest.CheckTimeout(t, 10*time.Second)() + + db, err := testutil.NewTSDB() + defer func() { testutil.Ok(t, db.Close()) }() + testutil.Ok(t, err) + + testSeries_SplitSamplesIntoChunksWithMaxSizeOfUint16_e2e(t, db.Appender(), func() storepb.StoreServer { + tsdbStore := NewTSDBStore(nil, nil, db, component.Rule, labels.FromStrings("region", "eu-west")) + + return tsdbStore + }) +} \ No newline at end of file diff --git a/pkg/testutil/prometheus.go b/pkg/testutil/prometheus.go index 3843068469..dce6866f4f 100644 --- a/pkg/testutil/prometheus.go +++ b/pkg/testutil/prometheus.go @@ -163,16 +163,16 @@ func (p *Prometheus) Start() error { ) } p.addr = fmt.Sprintf("localhost:%d", port) - p.cmd = exec.Command( - prometheusBin(p.version), - append([]string{ - "--storage.tsdb.path=" + p.db.Dir(), - "--web.listen-address=" + p.addr, - "--web.route-prefix=" + p.prefix, - "--web.enable-admin-api", - "--config.file=" + filepath.Join(p.db.Dir(), "prometheus.yml"), - }, extra...)..., - ) + args := append([]string{ + "--storage.tsdb.retention=2d", // Pass retention cause prometheus since 2.8.0 don't show default value for that flags in web/api: https://github.com/prometheus/prometheus/pull/5433 + "--storage.tsdb.path=" + p.db.Dir(), + "--web.listen-address=" + p.addr, + "--web.route-prefix=" + p.prefix, + "--web.enable-admin-api", + "--config.file=" + filepath.Join(p.db.Dir(), "prometheus.yml"), + }, extra...) + + p.cmd = exec.Command(prometheusBin(p.version), args...) go func() { if b, err := p.cmd.CombinedOutput(); err != nil { fmt.Fprintln(os.Stderr, "running Prometheus failed", err) @@ -231,7 +231,7 @@ func (p *Prometheus) SetConfig(s string) (err error) { if err != nil { return err } - defer runutil.CloseWithErrCapture(nil, &err, f, "prometheus config") + defer runutil.CloseWithErrCapture(&err, f, "prometheus config") _, err = f.Write([]byte(s)) return err @@ -264,6 +264,7 @@ func (p *Prometheus) Appender() tsdb.Appender { // CreateBlock writes a block with the given series and numSamples samples each. // Samples will be in the time range [mint, maxt). func CreateBlock( + ctx context.Context, dir string, series []labels.Labels, numSamples int, @@ -271,11 +272,12 @@ func CreateBlock( extLset labels.Labels, resolution int64, ) (id ulid.ULID, err error) { - return createBlock(dir, series, numSamples, mint, maxt, extLset, resolution, false) + return createBlock(ctx, dir, series, numSamples, mint, maxt, extLset, resolution, false) } // CreateBlockWithTombstone is same as CreateBlock but leaves tombstones which mimics the Prometheus local block. func CreateBlockWithTombstone( + ctx context.Context, dir string, series []labels.Labels, numSamples int, @@ -283,10 +285,11 @@ func CreateBlockWithTombstone( extLset labels.Labels, resolution int64, ) (id ulid.ULID, err error) { - return createBlock(dir, series, numSamples, mint, maxt, extLset, resolution, true) + return createBlock(ctx, dir, series, numSamples, mint, maxt, extLset, resolution, true) } func createBlock( + ctx context.Context, dir string, series []labels.Labels, numSamples int, @@ -299,7 +302,7 @@ func createBlock( if err != nil { return id, errors.Wrap(err, "create head block") } - defer runutil.CloseWithErrCapture(log.NewNopLogger(), &err, h, "TSDB Head") + defer runutil.CloseWithErrCapture(&err, h, "TSDB Head") var g errgroup.Group var timeStepSize = (maxt - mint) / int64(numSamples+1) @@ -340,7 +343,7 @@ func createBlock( if err := g.Wait(); err != nil { return id, err } - c, err := tsdb.NewLeveledCompactor(nil, log.NewNopLogger(), []int64{maxt - mint}, nil) + c, err := tsdb.NewLeveledCompactor(ctx, nil, log.NewNopLogger(), []int64{maxt - mint}, nil) if err != nil { return id, errors.Wrap(err, "create compactor") } diff --git a/pkg/ui/bindata.go b/pkg/ui/bindata.go index 5d3444887d..f245abf856 100644 --- a/pkg/ui/bindata.go +++ b/pkg/ui/bindata.go @@ -122,7 +122,7 @@ func pkgUiTemplates_baseHtml() (*asset, error) { return nil, err } - info := bindataFileInfo{name: "pkg/ui/templates/_base.html", size: 1065, mode: os.FileMode(420), modTime: time.Unix(1551875090, 0)} + info := bindataFileInfo{name: "pkg/ui/templates/_base.html", size: 1065, mode: os.FileMode(420), modTime: time.Unix(1552065161, 0)} a := &asset{bytes: bytes, info: info} return a, nil } @@ -142,7 +142,7 @@ func pkgUiTemplatesAlertsHtml() (*asset, error) { return nil, err } - info := bindataFileInfo{name: "pkg/ui/templates/alerts.html", size: 2534, mode: os.FileMode(420), modTime: time.Unix(1551875090, 0)} + info := bindataFileInfo{name: "pkg/ui/templates/alerts.html", size: 2534, mode: os.FileMode(420), modTime: time.Unix(1552065161, 0)} a := &asset{bytes: bytes, info: info} return a, nil } @@ -162,7 +162,7 @@ func pkgUiTemplatesFlagsHtml() (*asset, error) { return nil, err } - info := bindataFileInfo{name: "pkg/ui/templates/flags.html", size: 433, mode: os.FileMode(420), modTime: time.Unix(1551875090, 0)} + info := bindataFileInfo{name: "pkg/ui/templates/flags.html", size: 433, mode: os.FileMode(420), modTime: time.Unix(1552065161, 0)} a := &asset{bytes: bytes, info: info} return a, nil } @@ -182,7 +182,7 @@ func pkgUiTemplatesGraphHtml() (*asset, error) { return nil, err } - info := bindataFileInfo{name: "pkg/ui/templates/graph.html", size: 2061, mode: os.FileMode(420), modTime: time.Unix(1551875090, 0)} + info := bindataFileInfo{name: "pkg/ui/templates/graph.html", size: 2061, mode: os.FileMode(420), modTime: time.Unix(1552065161, 0)} a := &asset{bytes: bytes, info: info} return a, nil } @@ -202,7 +202,7 @@ func pkgUiTemplatesQuery_menuHtml() (*asset, error) { return nil, err } - info := bindataFileInfo{name: "pkg/ui/templates/query_menu.html", size: 1479, mode: os.FileMode(420), modTime: time.Unix(1551875090, 0)} + info := bindataFileInfo{name: "pkg/ui/templates/query_menu.html", size: 1479, mode: os.FileMode(420), modTime: time.Unix(1552065161, 0)} a := &asset{bytes: bytes, info: info} return a, nil } @@ -222,7 +222,7 @@ func pkgUiTemplatesRule_menuHtml() (*asset, error) { return nil, err } - info := bindataFileInfo{name: "pkg/ui/templates/rule_menu.html", size: 1021, mode: os.FileMode(420), modTime: time.Unix(1551875090, 0)} + info := bindataFileInfo{name: "pkg/ui/templates/rule_menu.html", size: 1021, mode: os.FileMode(420), modTime: time.Unix(1552065161, 0)} a := &asset{bytes: bytes, info: info} return a, nil } @@ -242,7 +242,7 @@ func pkgUiTemplatesRulesHtml() (*asset, error) { return nil, err } - info := bindataFileInfo{name: "pkg/ui/templates/rules.html", size: 1103, mode: os.FileMode(420), modTime: time.Unix(1551875947, 0)} + info := bindataFileInfo{name: "pkg/ui/templates/rules.html", size: 1103, mode: os.FileMode(420), modTime: time.Unix(1552065161, 0)} a := &asset{bytes: bytes, info: info} return a, nil } @@ -262,12 +262,12 @@ func pkgUiTemplatesStatusHtml() (*asset, error) { return nil, err } - info := bindataFileInfo{name: "pkg/ui/templates/status.html", size: 1286, mode: os.FileMode(420), modTime: time.Unix(1551875090, 0)} + info := bindataFileInfo{name: "pkg/ui/templates/status.html", size: 1286, mode: os.FileMode(420), modTime: time.Unix(1552065161, 0)} a := &asset{bytes: bytes, info: info} return a, nil } -var _pkgUiTemplatesStoresHtml = []byte("\x1f\x8b\x08\x00\x00\x00\x00\x00\x00\xff\x9c\x55\x41\x6b\xe3\x3c\x10\xbd\xe7\x57\x0c\xe2\x3b\x7e\x89\xa1\x97\x85\xc5\xc9\xb2\x2c\x85\x3d\xb4\x65\xa1\xdd\x5e\x17\xc5\x9a\xc4\xa2\x8a\x64\x34\xe3\x36\x41\xe8\xbf\x2f\x72\xec\xc4\x6e\x9c\x34\xdd\x8b\xb1\x66\x9e\x34\x6f\x66\x9e\x46\x21\x28\x5c\x69\x8b\x20\x4a\x94\x4a\xc4\x38\xc9\x8d\xb6\x2f\xc0\xbb\x0a\xe7\x82\x71\xcb\x59\x41\x24\xc0\xa3\x99\x0b\xe2\x9d\x41\x2a\x11\x59\x40\xe9\x71\x35\x17\x21\x40\x25\xb9\xfc\xe5\x71\xa5\xb7\x10\x63\x46\x2c\x59\x17\x69\x4f\xe6\x6b\x83\x34\x2b\x88\xbe\xbd\xce\x43\x80\x65\xad\x8d\x7a\x46\x4f\xda\x59\x88\x51\x2c\x26\x21\xa0\x55\x31\x4e\x26\x47\x12\x85\xb3\x8c\x96\x1b\x1e\x4a\xbf\x42\x61\x24\xd1\xbc\x31\x4b\x6d\xd1\x4f\x57\xa6\xd6\x4a\x2c\x26\x00\x00\x21\x78\x69\xd7\x08\xff\x11\x3b\x8f\x4f\xbb\x0a\xff\x6f\xff\x09\xbe\xce\x61\x16\x63\x0b\xd3\xab\x1e\xa6\xb5\xe6\xe5\xcd\x22\x84\xa3\x79\xf6\xc8\x5e\xdb\x75\x8c\x79\x56\xde\x74\xe7\xa3\xa1\x3e\xfe\xb7\x7d\xb1\xee\xcd\x42\xc2\x0f\x60\x4d\x1a\x0d\x8a\xe5\xd2\x60\x47\x7b\xbf\x68\xbe\xd3\xa5\xf3\x0a\x3d\x76\xdc\xf7\xe0\x54\xf3\xfe\xda\x1f\x17\x2d\x60\x71\x6b\x55\xe5\xb4\xe5\x3c\xe3\xf2\xd4\xfb\xc8\x92\x6b\x1a\xf7\x7d\xb7\xd6\xd5\xb6\x40\x05\x77\x72\x89\xe6\x0c\xea\x5e\x5b\x78\xd2\x1b\x3c\xe3\x95\xdb\x0b\xde\x3b\x49\x0c\x3f\x51\x1a\x2e\xe1\x47\x89\xc5\xcb\x05\xd8\x3d\x12\xc9\xf5\xbb\x83\xf2\xac\x9f\x72\xf2\xbd\x2b\xc8\xd2\xa9\xdd\x71\x3d\x6c\x78\xea\x71\xdb\xee\xb6\xfa\x67\x8a\xa8\x0e\x8d\x9e\x3d\xc8\x0d\xa6\x16\xb3\x3a\x01\x75\x4d\x4b\x0a\x46\x31\x74\x43\x27\x23\xeb\xb8\x8d\x39\x4b\x59\xdd\x7a\xef\x7c\x2f\xf8\xe1\x38\xaa\xa4\xed\x0e\x94\x06\x3d\x43\xf3\x9d\x52\x5d\x14\x48\x04\x4d\x90\x3f\xda\x2a\x5d\x48\x76\x1e\xd2\x45\x9b\xd6\x55\x85\xbe\x90\x34\x16\xbd\xae\x4e\x83\x64\x29\xca\x18\xd1\x9e\x6c\xaf\x62\xa5\x52\x55\xfd\xe7\x49\x29\xf7\x66\x3f\x43\xeb\x70\x4d\x8e\xd8\x91\x46\x0c\x0d\x87\x9e\x9b\x24\xe2\x63\xcf\x67\x7b\x51\x7f\x94\xe6\x7e\x57\xf3\x9d\x56\x5e\x6f\xa4\xdf\x89\x24\x87\xc6\xd2\xca\x21\x8d\xb1\xd6\xf0\x2c\x4d\x8d\x31\x8a\xb1\x24\xae\x4f\x20\x84\x95\xf3\x1b\xc9\xe9\xe6\x10\xcb\x4d\xd5\x71\xbe\xd7\x36\xd9\xce\x28\xf0\xc2\x3e\xb9\xbd\xbc\x8f\xb4\x2d\xb0\xaf\xcc\xe6\x3a\xc6\x08\x72\xed\xae\x28\x32\x0c\xc7\xe4\x45\x6d\x9f\x94\xf8\x63\x25\x8d\x48\x67\x1f\xf1\xda\x70\xff\x2c\xa9\xe1\x80\x39\xb9\x19\x63\xc3\x02\x0a\x67\x52\xb8\xb9\xf8\x32\xc2\xfb\xc1\x41\xfb\xc2\x78\x5c\x6b\xe2\x34\xd2\x3f\x13\x7f\xc0\x37\xcf\x7a\x03\x2e\xcf\x9a\x87\x62\xe4\xe9\x69\xbc\xcb\xfe\xa0\xec\x3d\x8d\xfd\xea\xbf\x49\x6f\xb5\x5d\x8b\xc5\x18\xcb\x3c\x53\xfa\x75\xf8\x62\xb5\xa6\x6e\xf9\x37\x00\x00\xff\xff\x71\x7e\x93\x06\x0c\x08\x00\x00") +var _pkgUiTemplatesStoresHtml = []byte("\x1f\x8b\x08\x00\x00\x00\x00\x00\x00\xff\x9c\x55\x51\x6b\xeb\x3a\x0c\x7e\xef\xaf\x10\x61\xaf\x6d\x60\x2f\x17\x2e\x69\x2f\x97\xcb\xe0\x3e\x6c\xe3\x40\x77\xf6\x7a\x70\x6d\xb5\x31\x73\xed\x60\x29\x5b\x8b\xf1\x7f\x3f\x38\x4d\xda\x64\x4d\xbb\xee\xbc\x04\x2c\x7d\x8a\x3e\x49\x9f\xe5\x10\x14\xae\xb5\x45\xc8\x4a\x14\x2a\x8b\x71\x52\x18\x6d\xdf\x80\xf7\x15\xce\x33\xc6\x1d\xe7\x92\x28\x03\x8f\x66\x9e\x11\xef\x0d\x52\x89\xc8\x19\x94\x1e\xd7\xf3\x2c\x04\xa8\x04\x97\x3f\x3c\xae\xf5\x0e\x62\xcc\x89\x05\x6b\x99\x62\x72\x5f\x1b\xa4\x99\x24\xfa\xe7\x7d\x1e\x02\xac\x6a\x6d\xd4\x2b\x7a\xd2\xce\x42\x8c\xd9\x62\x12\x02\x5a\x15\xe3\x64\x72\x22\x21\x9d\x65\xb4\xdc\xf0\x50\xfa\x1d\xa4\x11\x44\xf3\xc6\x2c\xb4\x45\x3f\x5d\x9b\x5a\xab\x6c\x31\x01\x00\x08\xc1\x0b\xbb\x41\xb8\x23\x76\x1e\x5f\xf6\x15\xc2\xdf\x73\x98\x2d\x5d\xed\x25\x52\x8c\x2d\x48\xaf\x7b\x88\xd6\x5a\x94\xf7\x8b\x10\x58\xb3\xe9\x87\xcf\x96\xec\xb5\xdd\xc4\x58\xe4\xe5\x7d\x97\x03\x0d\xf5\xa3\x7e\xda\x37\xeb\x3e\x2c\x24\xfc\x00\xd6\x94\xd2\xa0\x58\xac\x0c\x76\xd4\x0f\x87\xe6\x3b\x5d\x39\xaf\xd0\x63\xc7\xff\x00\x4e\x7d\xef\x9f\xfd\xe9\xd0\x02\x16\x0f\x56\x55\x4e\x5b\x2e\x72\x2e\xcf\xbd\x4b\x16\x5c\xd3\xb8\xef\x5f\x6b\x5d\x6d\x25\x2a\x78\x14\x2b\x34\x17\x50\x4f\xda\xc2\x8b\xde\xe2\x05\xaf\xd8\x5d\xf1\x3e\x0a\x62\xf8\x1f\x85\xe1\x12\xfe\x2b\x51\xbe\x5d\x81\x3d\x21\x91\xd8\x7c\xfa\x51\x91\xf7\x4b\x4e\xbe\x4f\x0d\x59\x39\xb5\x3f\x9d\x87\x43\x4f\x03\xd7\x56\xe1\x0e\xee\x66\xcb\x64\xa0\xf3\x59\x5f\x68\xab\x5a\x84\x70\xc0\xce\x9e\xc5\x16\xd3\xd0\x59\x9d\x81\xba\x31\x26\x5d\x63\x36\x74\x43\x27\x2f\xeb\xb8\x4d\x3b\x4b\x75\x3e\x78\xef\x7c\x2f\xf9\xf1\x77\x54\x09\xdb\xfd\x50\x18\xf4\x0c\xcd\x77\x4a\xb5\x94\x48\x04\x4d\x92\x5f\xda\x2a\x2d\x05\x3b\x0f\xe9\xfa\x4d\xeb\xaa\x42\x2f\x05\x8d\x65\xaf\xab\xf3\x24\x79\xca\x32\x46\xb4\x27\xe4\x9b\x58\xa9\xd4\x67\xff\x7d\x52\xca\x7d\xd8\xef\xd0\x3a\x5e\x9c\x13\x76\x64\x10\x43\xc3\x51\x05\x26\xc9\x3a\xa9\xe0\xd8\xff\x24\xf3\xaf\xca\x3c\x44\x35\xdf\x69\xe5\xf5\x56\xf8\x7d\x96\xe4\xd0\x58\x5a\x39\xa4\xe5\xd6\x1a\x5e\x85\xa9\x31\xc6\x6c\xac\x88\xdb\x0b\x08\x61\xed\xfc\x56\x70\xba\x4b\xc4\x62\x5b\x75\x9c\x9f\xb4\x4d\xb6\x0b\x0a\xbc\x12\x27\x76\xd7\xe3\x48\x5b\x89\x7d\x65\x36\x17\x34\x46\x10\x1b\x77\x43\x93\x61\xb8\x3e\xaf\x6a\xfb\xac\xc5\x5f\x2b\x69\x44\x3a\x87\x8c\xb7\xa6\xfb\x63\x49\x0d\x57\xce\xd9\xcd\x18\x5b\x16\x20\x9d\x49\xe9\xe6\xd9\x5f\x23\xbc\x9f\x1d\xd0\x61\xfb\x78\xdc\x68\xe2\xb4\xe4\xbf\x93\x7f\xc0\xb7\xc8\x7b\x2b\xaf\xc8\x9b\xa7\x63\xe4\x31\x6a\xbc\xab\xfe\xea\xec\x3d\x98\xfd\xee\x7f\x08\x6f\xb5\xdd\x64\x8b\x31\x96\x45\xae\xf4\xfb\xf0\x0d\x6b\x4d\xdd\xf1\x77\x00\x00\x00\xff\xff\x68\x27\x86\x45\x22\x08\x00\x00") func pkgUiTemplatesStoresHtmlBytes() ([]byte, error) { return bindataRead( @@ -282,7 +282,7 @@ func pkgUiTemplatesStoresHtml() (*asset, error) { return nil, err } - info := bindataFileInfo{name: "pkg/ui/templates/stores.html", size: 2060, mode: os.FileMode(420), modTime: time.Unix(1551875090, 0)} + info := bindataFileInfo{name: "pkg/ui/templates/stores.html", size: 2082, mode: os.FileMode(420), modTime: time.Unix(1552832748, 0)} a := &asset{bytes: bytes, info: info} return a, nil } @@ -302,7 +302,7 @@ func pkgUiStaticCssAlertsCss() (*asset, error) { return nil, err } - info := bindataFileInfo{name: "pkg/ui/static/css/alerts.css", size: 383, mode: os.FileMode(420), modTime: time.Unix(1551875090, 0)} + info := bindataFileInfo{name: "pkg/ui/static/css/alerts.css", size: 383, mode: os.FileMode(420), modTime: time.Unix(1552065161, 0)} a := &asset{bytes: bytes, info: info} return a, nil } @@ -322,7 +322,7 @@ func pkgUiStaticCssGraphCss() (*asset, error) { return nil, err } - info := bindataFileInfo{name: "pkg/ui/static/css/graph.css", size: 3363, mode: os.FileMode(420), modTime: time.Unix(1551875090, 0)} + info := bindataFileInfo{name: "pkg/ui/static/css/graph.css", size: 3363, mode: os.FileMode(420), modTime: time.Unix(1552065161, 0)} a := &asset{bytes: bytes, info: info} return a, nil } @@ -342,7 +342,7 @@ func pkgUiStaticCssPrometheusCss() (*asset, error) { return nil, err } - info := bindataFileInfo{name: "pkg/ui/static/css/prometheus.css", size: 322, mode: os.FileMode(420), modTime: time.Unix(1551875090, 0)} + info := bindataFileInfo{name: "pkg/ui/static/css/prometheus.css", size: 322, mode: os.FileMode(420), modTime: time.Unix(1552065161, 0)} a := &asset{bytes: bytes, info: info} return a, nil } @@ -362,7 +362,7 @@ func pkgUiStaticCssRulesCss() (*asset, error) { return nil, err } - info := bindataFileInfo{name: "pkg/ui/static/css/rules.css", size: 190, mode: os.FileMode(420), modTime: time.Unix(1551875090, 0)} + info := bindataFileInfo{name: "pkg/ui/static/css/rules.css", size: 190, mode: os.FileMode(420), modTime: time.Unix(1552065161, 0)} a := &asset{bytes: bytes, info: info} return a, nil } @@ -382,7 +382,7 @@ func pkgUiStaticImgAjaxLoaderGif() (*asset, error) { return nil, err } - info := bindataFileInfo{name: "pkg/ui/static/img/ajax-loader.gif", size: 847, mode: os.FileMode(420), modTime: time.Unix(1551875090, 0)} + info := bindataFileInfo{name: "pkg/ui/static/img/ajax-loader.gif", size: 847, mode: os.FileMode(420), modTime: time.Unix(1552065161, 0)} a := &asset{bytes: bytes, info: info} return a, nil } @@ -402,7 +402,7 @@ func pkgUiStaticImgFaviconIco() (*asset, error) { return nil, err } - info := bindataFileInfo{name: "pkg/ui/static/img/favicon.ico", size: 15886, mode: os.FileMode(420), modTime: time.Unix(1551875090, 0)} + info := bindataFileInfo{name: "pkg/ui/static/img/favicon.ico", size: 15886, mode: os.FileMode(420), modTime: time.Unix(1552065161, 0)} a := &asset{bytes: bytes, info: info} return a, nil } @@ -422,7 +422,7 @@ func pkgUiStaticJsAlertsJs() (*asset, error) { return nil, err } - info := bindataFileInfo{name: "pkg/ui/static/js/alerts.js", size: 1152, mode: os.FileMode(420), modTime: time.Unix(1551875090, 0)} + info := bindataFileInfo{name: "pkg/ui/static/js/alerts.js", size: 1152, mode: os.FileMode(420), modTime: time.Unix(1552065161, 0)} a := &asset{bytes: bytes, info: info} return a, nil } @@ -442,7 +442,7 @@ func pkgUiStaticJsGraphJs() (*asset, error) { return nil, err } - info := bindataFileInfo{name: "pkg/ui/static/js/graph.js", size: 32282, mode: os.FileMode(420), modTime: time.Unix(1551875090, 0)} + info := bindataFileInfo{name: "pkg/ui/static/js/graph.js", size: 32282, mode: os.FileMode(420), modTime: time.Unix(1552065161, 0)} a := &asset{bytes: bytes, info: info} return a, nil } @@ -462,7 +462,7 @@ func pkgUiStaticJsGraph_templateHandlebar() (*asset, error) { return nil, err } - info := bindataFileInfo{name: "pkg/ui/static/js/graph_template.handlebar", size: 7611, mode: os.FileMode(420), modTime: time.Unix(1551875090, 0)} + info := bindataFileInfo{name: "pkg/ui/static/js/graph_template.handlebar", size: 7611, mode: os.FileMode(420), modTime: time.Unix(1552065161, 0)} a := &asset{bytes: bytes, info: info} return a, nil } @@ -482,7 +482,7 @@ func pkgUiStaticVendorBootstrap331CssBootstrapThemeMinCss() (*asset, error) { return nil, err } - info := bindataFileInfo{name: "pkg/ui/static/vendor/bootstrap-3.3.1/css/bootstrap-theme.min.css", size: 19835, mode: os.FileMode(420), modTime: time.Unix(1551875090, 0)} + info := bindataFileInfo{name: "pkg/ui/static/vendor/bootstrap-3.3.1/css/bootstrap-theme.min.css", size: 19835, mode: os.FileMode(420), modTime: time.Unix(1552065161, 0)} a := &asset{bytes: bytes, info: info} return a, nil } @@ -502,7 +502,7 @@ func pkgUiStaticVendorBootstrap331CssBootstrapMinCss() (*asset, error) { return nil, err } - info := bindataFileInfo{name: "pkg/ui/static/vendor/bootstrap-3.3.1/css/bootstrap.min.css", size: 113498, mode: os.FileMode(420), modTime: time.Unix(1551875090, 0)} + info := bindataFileInfo{name: "pkg/ui/static/vendor/bootstrap-3.3.1/css/bootstrap.min.css", size: 113498, mode: os.FileMode(420), modTime: time.Unix(1552065161, 0)} a := &asset{bytes: bytes, info: info} return a, nil } @@ -522,7 +522,7 @@ func pkgUiStaticVendorBootstrap331FontsGlyphiconsHalflingsRegularEot() (*asset, return nil, err } - info := bindataFileInfo{name: "pkg/ui/static/vendor/bootstrap-3.3.1/fonts/glyphicons-halflings-regular.eot", size: 20335, mode: os.FileMode(420), modTime: time.Unix(1551875090, 0)} + info := bindataFileInfo{name: "pkg/ui/static/vendor/bootstrap-3.3.1/fonts/glyphicons-halflings-regular.eot", size: 20335, mode: os.FileMode(420), modTime: time.Unix(1552065161, 0)} a := &asset{bytes: bytes, info: info} return a, nil } @@ -542,7 +542,7 @@ func pkgUiStaticVendorBootstrap331FontsGlyphiconsHalflingsRegularSvg() (*asset, return nil, err } - info := bindataFileInfo{name: "pkg/ui/static/vendor/bootstrap-3.3.1/fonts/glyphicons-halflings-regular.svg", size: 62926, mode: os.FileMode(420), modTime: time.Unix(1551875090, 0)} + info := bindataFileInfo{name: "pkg/ui/static/vendor/bootstrap-3.3.1/fonts/glyphicons-halflings-regular.svg", size: 62926, mode: os.FileMode(420), modTime: time.Unix(1552065161, 0)} a := &asset{bytes: bytes, info: info} return a, nil } @@ -562,7 +562,7 @@ func pkgUiStaticVendorBootstrap331FontsGlyphiconsHalflingsRegularTtf() (*asset, return nil, err } - info := bindataFileInfo{name: "pkg/ui/static/vendor/bootstrap-3.3.1/fonts/glyphicons-halflings-regular.ttf", size: 41280, mode: os.FileMode(420), modTime: time.Unix(1551875090, 0)} + info := bindataFileInfo{name: "pkg/ui/static/vendor/bootstrap-3.3.1/fonts/glyphicons-halflings-regular.ttf", size: 41280, mode: os.FileMode(420), modTime: time.Unix(1552065161, 0)} a := &asset{bytes: bytes, info: info} return a, nil } @@ -582,7 +582,7 @@ func pkgUiStaticVendorBootstrap331FontsGlyphiconsHalflingsRegularWoff() (*asset, return nil, err } - info := bindataFileInfo{name: "pkg/ui/static/vendor/bootstrap-3.3.1/fonts/glyphicons-halflings-regular.woff", size: 23320, mode: os.FileMode(420), modTime: time.Unix(1551875090, 0)} + info := bindataFileInfo{name: "pkg/ui/static/vendor/bootstrap-3.3.1/fonts/glyphicons-halflings-regular.woff", size: 23320, mode: os.FileMode(420), modTime: time.Unix(1552065161, 0)} a := &asset{bytes: bytes, info: info} return a, nil } @@ -602,7 +602,7 @@ func pkgUiStaticVendorBootstrap331JsBootstrapMinJs() (*asset, error) { return nil, err } - info := bindataFileInfo{name: "pkg/ui/static/vendor/bootstrap-3.3.1/js/bootstrap.min.js", size: 35601, mode: os.FileMode(420), modTime: time.Unix(1551875090, 0)} + info := bindataFileInfo{name: "pkg/ui/static/vendor/bootstrap-3.3.1/js/bootstrap.min.js", size: 35601, mode: os.FileMode(420), modTime: time.Unix(1552065161, 0)} a := &asset{bytes: bytes, info: info} return a, nil } @@ -622,7 +622,7 @@ func pkgUiStaticVendorBootstrap331JsNpmJs() (*asset, error) { return nil, err } - info := bindataFileInfo{name: "pkg/ui/static/vendor/bootstrap-3.3.1/js/npm.js", size: 484, mode: os.FileMode(420), modTime: time.Unix(1551875090, 0)} + info := bindataFileInfo{name: "pkg/ui/static/vendor/bootstrap-3.3.1/js/npm.js", size: 484, mode: os.FileMode(420), modTime: time.Unix(1552065161, 0)} a := &asset{bytes: bytes, info: info} return a, nil } @@ -642,7 +642,7 @@ func pkgUiStaticVendorBootstrap3TypeaheadBootstrap3TypeaheadMinJs() (*asset, err return nil, err } - info := bindataFileInfo{name: "pkg/ui/static/vendor/bootstrap3-typeahead/bootstrap3-typeahead.min.js", size: 7856, mode: os.FileMode(420), modTime: time.Unix(1551875090, 0)} + info := bindataFileInfo{name: "pkg/ui/static/vendor/bootstrap3-typeahead/bootstrap3-typeahead.min.js", size: 7856, mode: os.FileMode(420), modTime: time.Unix(1552065161, 0)} a := &asset{bytes: bytes, info: info} return a, nil } @@ -662,7 +662,7 @@ func pkgUiStaticVendorEonasdanBootstrapDatetimepickerBootstrapDatetimepickerMinC return nil, err } - info := bindataFileInfo{name: "pkg/ui/static/vendor/eonasdan-bootstrap-datetimepicker/bootstrap-datetimepicker.min.css", size: 7771, mode: os.FileMode(420), modTime: time.Unix(1551875090, 0)} + info := bindataFileInfo{name: "pkg/ui/static/vendor/eonasdan-bootstrap-datetimepicker/bootstrap-datetimepicker.min.css", size: 7771, mode: os.FileMode(420), modTime: time.Unix(1552065161, 0)} a := &asset{bytes: bytes, info: info} return a, nil } @@ -682,7 +682,7 @@ func pkgUiStaticVendorEonasdanBootstrapDatetimepickerBootstrapDatetimepickerMinJ return nil, err } - info := bindataFileInfo{name: "pkg/ui/static/vendor/eonasdan-bootstrap-datetimepicker/bootstrap-datetimepicker.min.js", size: 48881, mode: os.FileMode(420), modTime: time.Unix(1551875090, 0)} + info := bindataFileInfo{name: "pkg/ui/static/vendor/eonasdan-bootstrap-datetimepicker/bootstrap-datetimepicker.min.js", size: 48881, mode: os.FileMode(420), modTime: time.Unix(1552065161, 0)} a := &asset{bytes: bytes, info: info} return a, nil } @@ -702,7 +702,7 @@ func pkgUiStaticVendorFuzzyFuzzyJs() (*asset, error) { return nil, err } - info := bindataFileInfo{name: "pkg/ui/static/vendor/fuzzy/fuzzy.js", size: 5669, mode: os.FileMode(420), modTime: time.Unix(1551875090, 0)} + info := bindataFileInfo{name: "pkg/ui/static/vendor/fuzzy/fuzzy.js", size: 5669, mode: os.FileMode(420), modTime: time.Unix(1552065161, 0)} a := &asset{bytes: bytes, info: info} return a, nil } @@ -722,7 +722,7 @@ func pkgUiStaticVendorJsJqueryHotkeysJs() (*asset, error) { return nil, err } - info := bindataFileInfo{name: "pkg/ui/static/vendor/js/jquery.hotkeys.js", size: 4490, mode: os.FileMode(420), modTime: time.Unix(1551875090, 0)} + info := bindataFileInfo{name: "pkg/ui/static/vendor/js/jquery.hotkeys.js", size: 4490, mode: os.FileMode(420), modTime: time.Unix(1552065161, 0)} a := &asset{bytes: bytes, info: info} return a, nil } @@ -742,7 +742,7 @@ func pkgUiStaticVendorJsJqueryMinJs() (*asset, error) { return nil, err } - info := bindataFileInfo{name: "pkg/ui/static/vendor/js/jquery.min.js", size: 86671, mode: os.FileMode(420), modTime: time.Unix(1551875090, 0)} + info := bindataFileInfo{name: "pkg/ui/static/vendor/js/jquery.min.js", size: 86671, mode: os.FileMode(420), modTime: time.Unix(1552065161, 0)} a := &asset{bytes: bytes, info: info} return a, nil } @@ -762,7 +762,7 @@ func pkgUiStaticVendorJsJquerySelectionJs() (*asset, error) { return nil, err } - info := bindataFileInfo{name: "pkg/ui/static/vendor/js/jquery.selection.js", size: 12881, mode: os.FileMode(420), modTime: time.Unix(1551875090, 0)} + info := bindataFileInfo{name: "pkg/ui/static/vendor/js/jquery.selection.js", size: 12881, mode: os.FileMode(420), modTime: time.Unix(1552065161, 0)} a := &asset{bytes: bytes, info: info} return a, nil } @@ -782,7 +782,7 @@ func pkgUiStaticVendorMomentMomentTimezoneWithDataMinJs() (*asset, error) { return nil, err } - info := bindataFileInfo{name: "pkg/ui/static/vendor/moment/moment-timezone-with-data.min.js", size: 184190, mode: os.FileMode(420), modTime: time.Unix(1551875090, 0)} + info := bindataFileInfo{name: "pkg/ui/static/vendor/moment/moment-timezone-with-data.min.js", size: 184190, mode: os.FileMode(420), modTime: time.Unix(1552065161, 0)} a := &asset{bytes: bytes, info: info} return a, nil } @@ -802,7 +802,7 @@ func pkgUiStaticVendorMomentMomentMinJs() (*asset, error) { return nil, err } - info := bindataFileInfo{name: "pkg/ui/static/vendor/moment/moment.min.js", size: 61281, mode: os.FileMode(420), modTime: time.Unix(1551875090, 0)} + info := bindataFileInfo{name: "pkg/ui/static/vendor/moment/moment.min.js", size: 61281, mode: os.FileMode(420), modTime: time.Unix(1552065161, 0)} a := &asset{bytes: bytes, info: info} return a, nil } @@ -822,7 +822,7 @@ func pkgUiStaticVendorMustacheMustacheMinJs() (*asset, error) { return nil, err } - info := bindataFileInfo{name: "pkg/ui/static/vendor/mustache/mustache.min.js", size: 9528, mode: os.FileMode(420), modTime: time.Unix(1551875090, 0)} + info := bindataFileInfo{name: "pkg/ui/static/vendor/mustache/mustache.min.js", size: 9528, mode: os.FileMode(420), modTime: time.Unix(1552065161, 0)} a := &asset{bytes: bytes, info: info} return a, nil } @@ -842,7 +842,7 @@ func pkgUiStaticVendorRickshawRickshawMinCss() (*asset, error) { return nil, err } - info := bindataFileInfo{name: "pkg/ui/static/vendor/rickshaw/rickshaw.min.css", size: 6102, mode: os.FileMode(420), modTime: time.Unix(1551875090, 0)} + info := bindataFileInfo{name: "pkg/ui/static/vendor/rickshaw/rickshaw.min.css", size: 6102, mode: os.FileMode(420), modTime: time.Unix(1552065161, 0)} a := &asset{bytes: bytes, info: info} return a, nil } @@ -862,7 +862,7 @@ func pkgUiStaticVendorRickshawRickshawMinJs() (*asset, error) { return nil, err } - info := bindataFileInfo{name: "pkg/ui/static/vendor/rickshaw/rickshaw.min.js", size: 76322, mode: os.FileMode(420), modTime: time.Unix(1551875090, 0)} + info := bindataFileInfo{name: "pkg/ui/static/vendor/rickshaw/rickshaw.min.js", size: 76322, mode: os.FileMode(420), modTime: time.Unix(1552065161, 0)} a := &asset{bytes: bytes, info: info} return a, nil } @@ -882,7 +882,7 @@ func pkgUiStaticVendorRickshawVendorD3LayoutMinJs() (*asset, error) { return nil, err } - info := bindataFileInfo{name: "pkg/ui/static/vendor/rickshaw/vendor/d3.layout.min.js", size: 17514, mode: os.FileMode(420), modTime: time.Unix(1551875090, 0)} + info := bindataFileInfo{name: "pkg/ui/static/vendor/rickshaw/vendor/d3.layout.min.js", size: 17514, mode: os.FileMode(420), modTime: time.Unix(1552065161, 0)} a := &asset{bytes: bytes, info: info} return a, nil } @@ -902,7 +902,7 @@ func pkgUiStaticVendorRickshawVendorD3V3Js() (*asset, error) { return nil, err } - info := bindataFileInfo{name: "pkg/ui/static/vendor/rickshaw/vendor/d3.v3.js", size: 144718, mode: os.FileMode(420), modTime: time.Unix(1551875090, 0)} + info := bindataFileInfo{name: "pkg/ui/static/vendor/rickshaw/vendor/d3.v3.js", size: 144718, mode: os.FileMode(420), modTime: time.Unix(1552065161, 0)} a := &asset{bytes: bytes, info: info} return a, nil } diff --git a/pkg/ui/query.go b/pkg/ui/query.go index 969f07e596..fdf9d750b5 100644 --- a/pkg/ui/query.go +++ b/pkg/ui/query.go @@ -5,6 +5,7 @@ import ( "net/http" "os" "path" + "sort" "strings" "time" @@ -122,7 +123,28 @@ func (q *Query) stores(w http.ResponseWriter, r *http.Request) { for _, status := range q.storeSet.GetStoreStatus() { statuses[status.StoreType] = append(statuses[status.StoreType], status) } - q.executeTemplate(w, "stores.html", prefix, statuses) + + sources := make([]component.StoreAPI, 0, len(statuses)) + for k := range statuses { + sources = append(sources, k) + } + sort.Slice(sources, func(i int, j int) bool { + if sources[i] == nil { + return false + } + if sources[j] == nil { + return true + } + return sources[i].String() < sources[j].String() + }) + + q.executeTemplate(w, "stores.html", prefix, struct { + Stores map[component.StoreAPI][]query.StoreStatus + Sources []component.StoreAPI + }{ + Stores: statuses, + Sources: sources, + }) } func (q *Query) flags(w http.ResponseWriter, r *http.Request) { diff --git a/pkg/ui/templates/stores.html b/pkg/ui/templates/stores.html index 7750a1d1a0..a499cc16b7 100644 --- a/pkg/ui/templates/stores.html +++ b/pkg/ui/templates/stores.html @@ -4,9 +4,9 @@ {{define "content"}}
- {{range $storeType, $stores := .}} + {{range $storeType := .Sources}} {{if $storeType}} -

{{$storeType.String}}

+

{{title $storeType.String}}

{{else}}

Unknown Type

{{end}} @@ -23,7 +23,7 @@

Unknown Type

- {{range $store := $stores}} + {{range $store := index $.Stores $storeType}} {{$store.Name}} diff --git a/pkg/verifier/overlapped_blocks.go b/pkg/verifier/overlapped_blocks.go index 072fe54aec..af28fccb0c 100644 --- a/pkg/verifier/overlapped_blocks.go +++ b/pkg/verifier/overlapped_blocks.go @@ -2,6 +2,8 @@ package verifier import ( "context" + "sort" + "github.com/go-kit/kit/log" "github.com/go-kit/kit/log/level" "github.com/improbable-eng/thanos/pkg/block" @@ -10,7 +12,6 @@ import ( "github.com/oklog/ulid" "github.com/pkg/errors" "github.com/prometheus/tsdb" - "sort" ) const OverlappedBlocksIssueID = "overlapped_blocks" diff --git a/pkg/verifier/safe_delete.go b/pkg/verifier/safe_delete.go index 94383e07e2..cfea10c0df 100644 --- a/pkg/verifier/safe_delete.go +++ b/pkg/verifier/safe_delete.go @@ -2,9 +2,9 @@ package verifier import ( "context" - "fmt" "io/ioutil" "os" + "path/filepath" "github.com/go-kit/kit/log" "github.com/go-kit/kit/log/level" @@ -31,13 +31,18 @@ func SafeDelete(ctx context.Context, logger log.Logger, bkt objstore.Bucket, bac return errors.Errorf("%s dir seems to exists in backup bucket. Remove this block manually if you are sure it is safe to do", id) } - dir, err := ioutil.TempDir("", fmt.Sprintf("safe-delete-%s", id)) + tempdir, err := ioutil.TempDir("", "safe-delete") + if err != nil { + return err + } + dir := filepath.Join(tempdir, id.String()) + err = os.Mkdir(dir, 0755) if err != nil { return err } defer func() { - if err := os.RemoveAll(dir); err != nil { - level.Warn(logger).Log("msg", "failed to delete dir", "dir", dir, "err", err) + if err := os.RemoveAll(tempdir); err != nil { + level.Warn(logger).Log("msg", "failed to delete dir", "dir", tempdir, "err", err) } }() diff --git a/scripts/quickstart.sh b/scripts/quickstart.sh index 334d73f2da..6565571165 100755 --- a/scripts/quickstart.sh +++ b/scripts/quickstart.sh @@ -139,7 +139,9 @@ then --log.level debug \ --tsdb.path "./data/remote-write-receive-data" \ --grpc-address 0.0.0.0:19891 \ - --http-address 0.0.0.0:19691 \ + --http-address 0.0.0.0:18091 \ + --labels "receive=\"true\"" \ + ${OBJSTORECFG} \ --remote-write.address 0.0.0.0:19291 & mkdir -p "data/local-prometheus-data/" diff --git a/scripts/websitepreprocess.sh b/scripts/websitepreprocess.sh new file mode 100755 index 0000000000..13ca5cb27a --- /dev/null +++ b/scripts/websitepreprocess.sh @@ -0,0 +1,73 @@ +#!/usr/bin/env bash + +if ! [[ "$0" =~ "scripts/websitepreprocess.sh" ]]; then + echo "must be run from repository root" + exit 255 +fi + +WEBSITE_DIR="website" +ORIGINAL_CONTENT_DIR="docs" +OUTPUT_CONTENT_DIR="${WEBSITE_DIR}/docs-pre-processed" +COMMIT_SHA=`git rev-parse HEAD` + +rm -rf ${OUTPUT_CONTENT_DIR} +mkdir -p ${OUTPUT_CONTENT_DIR} + +# 1. Copy original content. +cp -r ${ORIGINAL_CONTENT_DIR}/* ${OUTPUT_CONTENT_DIR} + +# 2. Add headers to special CODE_OF_CONDUCT.md, CONTRIBUTING.md and CHANGELOG.md files. +echo "$(cat < ${OUTPUT_CONTENT_DIR}/CODE_OF_CONDUCT.md +tail -n +2 CODE_OF_CONDUCT.md >> ${OUTPUT_CONTENT_DIR}/CODE_OF_CONDUCT.md + +echo "$(cat < ${OUTPUT_CONTENT_DIR}/CONTRIBUTING.md +tail -n +2 CONTRIBUTING.md >> ${OUTPUT_CONTENT_DIR}/CONTRIBUTING.md + +echo "$(cat < ${OUTPUT_CONTENT_DIR}/CHANGELOG.md +tail -n +2 CHANGELOG.md >> ${OUTPUT_CONTENT_DIR}/CHANGELOG.md + +ALL_DOC_CONTENT_FILES=`echo "${OUTPUT_CONTENT_DIR}/**/*.md ${OUTPUT_CONTENT_DIR}/*.md"` +for file in ${ALL_DOC_CONTENT_FILES} +do + + relFile=${file#*/*/} + echo "$(cat <> ${file} + +done + +# 3. All the absolute links needs are directly linking github with the given commit. +perl -pi -e 's/]\(\//]\(https:\/\/github.com\/improbable-eng\/thanos\/tree\/'${COMMIT_SHA}'\/docs\//' ${ALL_DOC_CONTENT_FILES} + +# 4. All the relative links needs to have ../ This is because Hugo is missing: https://github.com/gohugoio/hugo/pull/3934 +perl -pi -e 's/]\(\.\//]\(..\//' ${ALL_DOC_CONTENT_FILES} +perl -pi -e 's/]\((?!http)/]\(..\//' ${ALL_DOC_CONTENT_FILES} +perl -pi -e 's/src=\"(?!http)/src=\"..\//' ${ALL_DOC_CONTENT_FILES} \ No newline at end of file diff --git a/test/e2e/query_test.go b/test/e2e/query_test.go index d8b4fc2607..912e1d80bf 100644 --- a/test/e2e/query_test.go +++ b/test/e2e/query_test.go @@ -145,6 +145,7 @@ func testQuerySimple(t *testing.T, conf testConfig) { "__name__": "up", "instance": model.LabelValue("localhost:9100"), "job": "node", + "receive": "true", }, res[3].Metric) } @@ -201,6 +202,7 @@ func testQuerySimple(t *testing.T, conf testConfig) { "__name__": "up", "instance": model.LabelValue("localhost:9100"), "job": "node", + "receive": "true", }, res[2].Metric) } } diff --git a/test/e2e/spinup_test.go b/test/e2e/spinup_test.go index 34bd40f695..c45fb0be24 100644 --- a/test/e2e/spinup_test.go +++ b/test/e2e/spinup_test.go @@ -108,6 +108,7 @@ func scraper(i int, config string) cmdScheduleFunc { cmds = append(cmds, newCmdExec(exec.Command(testutil.PrometheusBinary(), "--config.file", promDir+"/prometheus.yml", "--storage.tsdb.path", promDir, + "--storage.tsdb.max-block-duration", "2h", "--log.level", "info", "--web.listen-address", promHTTP(i), ))) @@ -145,6 +146,7 @@ func receiver(i int, config string) cmdScheduleFunc { "--grpc-address", remoteWriteReceiveGRPC(i), "--http-address", remoteWriteReceiveMetricHTTP(i), "--remote-write.address", remoteWriteReceiveHTTP(i), + "--labels", "receive=\"true\"", "--tsdb.path", promDir, "--log.level", "debug"))), nil } diff --git a/test/e2e/store_gateway_test.go b/test/e2e/store_gateway_test.go index 8648dc5b3d..094db34f9a 100644 --- a/test/e2e/store_gateway_test.go +++ b/test/e2e/store_gateway_test.go @@ -68,10 +68,10 @@ func TestStoreGatewayQuery(t *testing.T) { extLset2 := labels.FromStrings("ext1", "value1", "replica", "2") now := time.Now() - id1, err := testutil.CreateBlock(dir, series, 10, timestamp.FromTime(now), timestamp.FromTime(now.Add(2*time.Hour)), extLset, 0) + id1, err := testutil.CreateBlock(ctx, dir, series, 10, timestamp.FromTime(now), timestamp.FromTime(now.Add(2*time.Hour)), extLset, 0) testutil.Ok(t, err) - id2, err := testutil.CreateBlock(dir, series, 10, timestamp.FromTime(now), timestamp.FromTime(now.Add(2*time.Hour)), extLset2, 0) + id2, err := testutil.CreateBlock(ctx, dir, series, 10, timestamp.FromTime(now), timestamp.FromTime(now.Add(2*time.Hour)), extLset2, 0) testutil.Ok(t, err) l := log.NewLogfmtLogger(os.Stdout) diff --git a/tutorials/kubernetes-helm/README.md b/tutorials/kubernetes-helm/README.md new file mode 100644 index 0000000000..d5b1d2d23b --- /dev/null +++ b/tutorials/kubernetes-helm/README.md @@ -0,0 +1,154 @@ +To deploy thanos sidecar along with Prometheus using official helm chart - just run the next command, putting the values to a file `values.yaml` and changing `--namespace` value beforehand: + +``` +helm upgrade --version="8.6.0" --install --namespace="my-lovely-namespace" --values values.yaml prometheus-thanos-sidecar stable/prometheus +``` + +Take a note that you need to replace two placeholders in the values: `BUCKET_REPLACE_ME` and `CLUSTER_NAME`. Also adjust all the other values according to your infrastructure requirements. + +An example of the `values.yaml` file: +``` +rbac: + create: true + +alertmanager: + enabled: false + +pushgateway: + enabled: false + +nodeExporter: + enabled: false + +kubeStateMetrics: + enabled: false + +initChownData: + resources: + limits: + memory: 16Mi + cpu: 50m + requests: + memory: 16Mi + cpu: 50m + +server: + extraArgs: + log.level: debug + storage.tsdb.min-block-duration: 2h # Don't change this, see docs/components/sidecar.md + storage.tsdb.max-block-duration: 2h # Don't change this, see docs/components/sidecar.md + retention: 4h + service: + annotations: + prometheus.io/scrape: "true" + prometheus.io/port: "9090" + statefulSet: + enabled: true + podAnnotations: + prometheus.io/scrape: "true" + prometheus.io/port: "10902" + affinity: + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchExpressions: + - key: app + operator: In + values: + - prometheus + - key: component + operator: In + values: + - server + topologyKey: "kubernetes.io/hostname" + sidecarContainers: + - name: thanos-sidecar + # Always use explicit image tags (release or master--sha) instead of ambigous `latest` or `master`. + image: improbable/thanos:v0.3.2 + resources: + requests: + memory: "4Gi" + cpu: "2" + limits: + memory: "4Gi" + cpu: "2" + env: + - name: GOOGLE_APPLICATION_CREDENTIALS + value: /etc/secret/sa + args: + - "sidecar" + - "--log.level=debug" + - "--tsdb.path=/data/" + - "--prometheus.url=http://127.0.0.1:9090" + - "--cluster.disable" + - "--objstore.config={type: GCS, config: {bucket: BUCKET_REPLACE_ME}}" + - "--reloader.config-file=/etc/prometheus-config/prometheus.yml" + - "--reloader.config-envsubst-file=/etc/prometheus-shared/prometheus.yml" + - "--reloader.rule-dir=/etc/prometheus-config/rules" + ports: + - name: sidecar-http + containerPort: 10902 + - name: grpc + containerPort: 10901 + - name: cluster + containerPort: 10900 + volumeMounts: + - name: storage-volume + mountPath: /data + - name: thanos-storage-secret + mountPath: /etc/secret + - name: config-volume + mountPath: /etc/prometheus-config + readOnly: false + - name: prometheus-config-shared + mountPath: /etc/prometheus-shared/ + readOnly: false + configPath: /etc/prometheus-shared/prometheus.yml + replicaCount: 2 + persistentVolume: + size: 100Gi + extraVolumes: + - name: prometheus-config-shared + emptyDir: {} + extraVolumeMounts: + - name: prometheus-config-shared + mountPath: /etc/prometheus-shared/ + resources: + limits: + cpu: 4 + memory: 20Gi + requests: + cpu: 4 + memory: 20Gi + global: + scrape_interval: 5s + scrape_timeout: 4s + external_labels: + prometheus_group: CLUSTER_NAME + prometheus_replica: '$(HOSTNAME)' + evaluation_interval: 5s + extraSecretMounts: + - name: thanos-storage-secret + mountPath: /etc/secret/ + subPath: sa + readOnly: false + secretName: thanos-storage-secret + +configmapReload: + image: + repository: gcr.io/google-containers/pause-amd64 # This image changed to just pause since there's no option to disable configmapReload container in chart, but thanos-sidecar overtakes this functionality. So basically we don't need another reloader + tag: 3.1 + resources: + limits: + cpu: 20m + memory: 20Mi + requests: + cpu: 20m + memory: 20Mi + + +serverFiles: + alerts: {} + rules: {} + +``` diff --git a/website/archetypes/docs.md b/website/archetypes/docs.md new file mode 100644 index 0000000000..ee74fd4179 --- /dev/null +++ b/website/archetypes/docs.md @@ -0,0 +1,4 @@ +--- +title: "{{ replace .Name "-" " " | title }}" +--- + diff --git a/website/data/sponsors.yml b/website/data/sponsors.yml new file mode 100644 index 0000000000..f99a7ed6e0 --- /dev/null +++ b/website/data/sponsors.yml @@ -0,0 +1,11 @@ +--- +sponsors: +- name: Monzo + url: https://www.monzo.com + logo: monzo.png +- name: Utility Warehouse + url: https://www.utilitywarehouse.co.uk + logo: utilitywarehouse.png +- name: Adform + url: https://site.adform.com + logo: Adform_logo_RGB.png diff --git a/website/hugo.yaml b/website/hugo.yaml new file mode 100644 index 0000000000..472134ab6f --- /dev/null +++ b/website/hugo.yaml @@ -0,0 +1,34 @@ +title: "Thanos - Highly available Prometheus setup with long term storage capabilities" +# This is controlled by makefile: baseURL: "https://thanos.io" +languageCode: "en-us" + +enableGitInfo: true +enableEmoji: true +pygmentsCodeFences: true +pygmentsUseClasses: true + +# We use preprocessing script so it will not work as expected anyway. +disableLiveReload: true +googleAnalytics: "UA-137374921-1" + +# So /thanos/docs/b link from /thanos/docs/some/a will be rewritten as ../b +canonifyURLs: true +relativeURL: true + +# NOTE: Hugo is expected to run from `website` +contentDir: "docs-pre-processed" +archetypeDir: "archetypes" +layoutDir: "layouts" +publishDir: "public" +staticDir: "static" + +permalinks: + contributing: "/contributing/:filename.md" + components: "/components/:filename.md" + proposals: "/proposals/:filename.md" + +params: + SlackInvite: "https://join.slack.com/t/improbable-eng/shared_invite/enQtMzQ1ODcyMzQ5MjM4LWY5ZWZmNGM2ODc5MmViNmQ3ZTA3ZTY3NzQwOTBlMTkzZmIxZTIxODk0OWU3YjZhNWVlNDU3MDlkZGViZjhkMjc" + GithubUser: "improbable-eng" + GithubProject: "thanos" + TwitterHandle: "ThanosMetrics" \ No newline at end of file diff --git a/website/layouts/_default/baseof.html b/website/layouts/_default/baseof.html new file mode 100644 index 0000000000..9fd91250ab --- /dev/null +++ b/website/layouts/_default/baseof.html @@ -0,0 +1,93 @@ + + + + + + + + + + + + + {{ block "title" . }}{{ .Site.Title }}{{ end }} + + + + + + + + + {{ block "main" . }}{{ end }} + + + + + {{ if eq ( getenv "HUGO_ENV" ) "production" }} + {{ template "_internal/google_analytics_async.html" . }} + {{ end }} + + \ No newline at end of file diff --git a/website/layouts/_default/single.html b/website/layouts/_default/single.html new file mode 100644 index 0000000000..d68c45c348 --- /dev/null +++ b/website/layouts/_default/single.html @@ -0,0 +1,14 @@ +{{ define "main" }} +
+
+
+ {{ partial "_default/sidemenu.html" . }} +
+
+ {{ $content := replace .Content "" "
" }} + {{ $content = $content | replaceRE "()" `${1} # ${3}` }} + {{ $content | safeHTML}} + + + +{{ end }} \ No newline at end of file diff --git a/website/layouts/blog/list.html b/website/layouts/blog/list.html new file mode 100644 index 0000000000..697f79a429 --- /dev/null +++ b/website/layouts/blog/list.html @@ -0,0 +1,17 @@ +{{ define "main" }} +
+
+
+ {{ range .Paginator.Pages }} +
+

{{ .Title }}

+

Written by {{ .Params.Author }} and published on {{ .Date.Format "Jan 02, 2006" }}

+
+ {{ .Content }} +
+ {{ end}} + {{ template "_internal/pagination.html" . }} +
+
+
+{{ end }} \ No newline at end of file diff --git a/website/layouts/blog/single.html b/website/layouts/blog/single.html new file mode 100644 index 0000000000..52cb30adf8 --- /dev/null +++ b/website/layouts/blog/single.html @@ -0,0 +1,14 @@ +{{ define "main" }} +
+
+
+
+

{{ .Title }}

+

Written by {{ .Params.Author }} and published on {{ .Date.Format "Jan 02, 2006" }}

+
+ {{ .Content }} +
+
+
+
+{{ end }} \ No newline at end of file diff --git a/website/layouts/index.html b/website/layouts/index.html new file mode 100644 index 0000000000..3fdf30b483 --- /dev/null +++ b/website/layouts/index.html @@ -0,0 +1,105 @@ +{{ define "main" }} +
+
+
+ Thanos Logo +
+
+
+
+

Highly available Prometheus setup with long term storage capabilities.

+ +
+
+
+
+
+
+ +

Global Query View

+

Scale your Prometheus setup by enabling querying your Prometheus metrics across multiple Prometheus server and clusters.

+
+
+ +

Unlimited Retention

+

Extend the system with the object storage of your choice to store your metrics for unlimited time. Supports GCP, S3, Azure, Swift and Tencent COS.

+
+
+ +

Prometheus Compatible

+

Use the same tools you love such as Grafana or others that supports Prometheus Query API.

+
+
+ +

Downsampling & Compaction

+

Downsample historical data for massive query speedup when querying large time ranges or configure complex retention policies.

+
+
+
+
+

Founded By

+
+
+
+ Improbable +
+
+
+

Used By

+
+ {{ range $sponsor := $.Site.Data.sponsors.sponsors }} + {{ if $sponsor.logo }} +
+
+ {{ $sponsor.name }} +
+
+ {{ end }} + {{ end }} +
+ +
+
+
+
+
+
+

Join the community !

+

Join users and companies that are using Thanos in production.

+ +
+
+
+{{ end }} diff --git a/website/layouts/partials/_default/sidemenu.html b/website/layouts/partials/_default/sidemenu.html new file mode 100644 index 0000000000..63be2707d1 --- /dev/null +++ b/website/layouts/partials/_default/sidemenu.html @@ -0,0 +1,32 @@ +{{ $currentPage := . }} + + + + \ No newline at end of file diff --git a/website/layouts/proposal/single.html b/website/layouts/proposal/single.html new file mode 100644 index 0000000000..1a86321c6d --- /dev/null +++ b/website/layouts/proposal/single.html @@ -0,0 +1,37 @@ +{{ define "main" }} +
+
+
+ {{ partial "_default/sidemenu.html" . }} +
+
+

{{ .Title }}

+
+
Status
+ {{ if eq .Params.Status "accepted"}} +
{{ .Params.status }}
+ {{ end }} + {{ if eq .Params.Status "complete"}} +
{{ .Params.status }}
+ {{ end }} + {{ if eq .Params.Status "rejected"}} +
{{ .Params.status }}
+ {{ end }} + {{ if eq .Params.Status "draft"}} +
{{ .Params.status }}
+ {{ end }} + {{ if eq .Params.Status "in-review"}} +
{{ .Params.status }}
+ {{ end }} +
Owner
+
@{{ .Params.owner }}
+ {{ if .Params.issue }} +
Issue
+
@{{ .Params.issue}}
+ {{ end }} +
+ {{ replace .Content "
" "
" | safeHTML }} + + + +{{ end }} \ No newline at end of file diff --git a/website/static/CNAME b/website/static/CNAME new file mode 100644 index 0000000000..ccca43b9c9 --- /dev/null +++ b/website/static/CNAME @@ -0,0 +1 @@ +thanos.io \ No newline at end of file diff --git a/website/static/Thanos-logo_full.svg b/website/static/Thanos-logo_full.svg new file mode 100644 index 0000000000..7941cfe32e --- /dev/null +++ b/website/static/Thanos-logo_full.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/website/static/icon-dark.png b/website/static/icon-dark.png new file mode 100644 index 0000000000..6008bc0d4f Binary files /dev/null and b/website/static/icon-dark.png differ diff --git a/website/static/icon-light.png b/website/static/icon-light.png new file mode 100644 index 0000000000..288e002d74 Binary files /dev/null and b/website/static/icon-light.png differ diff --git a/website/static/logos/Adform_logo_RGB.png b/website/static/logos/Adform_logo_RGB.png new file mode 100644 index 0000000000..78ceef9244 Binary files /dev/null and b/website/static/logos/Adform_logo_RGB.png differ diff --git a/website/static/logos/improbable.png b/website/static/logos/improbable.png new file mode 100644 index 0000000000..27db9c4416 Binary files /dev/null and b/website/static/logos/improbable.png differ diff --git a/website/static/logos/monzo.png b/website/static/logos/monzo.png new file mode 100644 index 0000000000..93d3264f71 Binary files /dev/null and b/website/static/logos/monzo.png differ diff --git a/website/static/logos/utilitywarehouse.png b/website/static/logos/utilitywarehouse.png new file mode 100644 index 0000000000..95df50191c Binary files /dev/null and b/website/static/logos/utilitywarehouse.png differ diff --git a/website/static/main.css b/website/static/main.css new file mode 100644 index 0000000000..8eacafa3cb --- /dev/null +++ b/website/static/main.css @@ -0,0 +1,72 @@ +.bg-purple { + background-color: #6D41FF; +} + +.color-purple { + color: #6D41FF; +} + +.navbar-brand img { + height: 32px; + margin-right: 10px; + max-width: 100%; +} + +.navbar-dark .navbar-nav .nav-link { + color: #fff; +} + +.nav-link:hover { + color: rgba(255,255,255,.5) +} + +.btn-outline-secondary { + color: #6D41FF; + border-color: #6D41FF; +} + +.btn-outline-secondary:hover, .btn-outline-secondary:active { + color: #fff; + background-color: #6D41FF !important; + border-color: #6D41FF !important; +} + +a { + color: #6D41FF; +} + +pre { + padding: 1rem; +} + +.list-group-item-thanos { + color: #6D41FF; + border: 0; + padding: 0.5rem; + background-color: transparent; + text-indent: 1rem; +} + +.list-group-item-thanos:hover { + color: #6D41FF; +} + +.img-sponsor { + padding: 25px; + background-color: #fff; + box-shadow: 0 2px 4px rgba(0,0,0,.1); + border: 1px solid #dee2e6; + margin: 15px 0 15px; + border-radius: .25rem; + display: flex; + align-items: center; + justify-content: center; + height: 100px; +} + +.img-sponsor img { + width: 100%; +} + +.header-anchor { font-size: 100%; visibility: hidden;} +h1:hover a, h2:hover a, h3:hover a, h4:hover a { visibility: visible} \ No newline at end of file diff --git a/website/static/syntax.css b/website/static/syntax.css new file mode 100644 index 0000000000..acbddee913 --- /dev/null +++ b/website/static/syntax.css @@ -0,0 +1,68 @@ +/* Background */ .chroma { background-color: #f8f8f8 } +/* Error */ .chroma .err { } +/* LineTableTD */ .chroma .lntd { vertical-align: top; padding: 0; margin: 0; border: 0; } +/* LineTable */ .chroma .lntable { border-spacing: 0; padding: 0; margin: 0; border: 0; width: auto; overflow: auto; display: block; } +/* LineHighlight */ .chroma .hl { display: block; width: 100%;background-color: #ffffcc } +/* LineNumbersTable */ .chroma .lnt { margin-right: 0.4em; padding: 0 0.4em 0 0.4em; } +/* LineNumbers */ .chroma .ln { margin-right: 0.4em; padding: 0 0.4em 0 0.4em; } +/* Keyword */ .chroma .k { color: #aa22ff; font-weight: bold } +/* KeywordConstant */ .chroma .kc { color: #aa22ff; font-weight: bold } +/* KeywordDeclaration */ .chroma .kd { color: #aa22ff; font-weight: bold } +/* KeywordNamespace */ .chroma .kn { color: #aa22ff; font-weight: bold } +/* KeywordPseudo */ .chroma .kp { color: #aa22ff } +/* KeywordReserved */ .chroma .kr { color: #aa22ff; font-weight: bold } +/* KeywordType */ .chroma .kt { color: #00bb00; font-weight: bold } +/* NameAttribute */ .chroma .na { color: #bb4444 } +/* NameBuiltin */ .chroma .nb { color: #aa22ff } +/* NameClass */ .chroma .nc { color: #0000ff } +/* NameConstant */ .chroma .no { color: #880000 } +/* NameDecorator */ .chroma .nd { color: #aa22ff } +/* NameEntity */ .chroma .ni { color: #999999; font-weight: bold } +/* NameException */ .chroma .ne { color: #d2413a; font-weight: bold } +/* NameFunction */ .chroma .nf { color: #00a000 } +/* NameLabel */ .chroma .nl { color: #a0a000 } +/* NameNamespace */ .chroma .nn { color: #0000ff; font-weight: bold } +/* NameTag */ .chroma .nt { color: #008000; font-weight: bold } +/* NameVariable */ .chroma .nv { color: #b8860b } +/* LiteralString */ .chroma .s { color: #bb4444 } +/* LiteralStringAffix */ .chroma .sa { color: #bb4444 } +/* LiteralStringBacktick */ .chroma .sb { color: #bb4444 } +/* LiteralStringChar */ .chroma .sc { color: #bb4444 } +/* LiteralStringDelimiter */ .chroma .dl { color: #bb4444 } +/* LiteralStringDoc */ .chroma .sd { color: #bb4444; font-style: italic } +/* LiteralStringDouble */ .chroma .s2 { color: #bb4444 } +/* LiteralStringEscape */ .chroma .se { color: #bb6622; font-weight: bold } +/* LiteralStringHeredoc */ .chroma .sh { color: #bb4444 } +/* LiteralStringInterpol */ .chroma .si { color: #bb6688; font-weight: bold } +/* LiteralStringOther */ .chroma .sx { color: #008000 } +/* LiteralStringRegex */ .chroma .sr { color: #bb6688 } +/* LiteralStringSingle */ .chroma .s1 { color: #bb4444 } +/* LiteralStringSymbol */ .chroma .ss { color: #b8860b } +/* LiteralNumber */ .chroma .m { color: #666666 } +/* LiteralNumberBin */ .chroma .mb { color: #666666 } +/* LiteralNumberFloat */ .chroma .mf { color: #666666 } +/* LiteralNumberHex */ .chroma .mh { color: #666666 } +/* LiteralNumberInteger */ .chroma .mi { color: #666666 } +/* LiteralNumberIntegerLong */ .chroma .il { color: #666666 } +/* LiteralNumberOct */ .chroma .mo { color: #666666 } +/* Operator */ .chroma .o { color: #666666 } +/* OperatorWord */ .chroma .ow { color: #aa22ff; font-weight: bold } +/* Comment */ .chroma .c { color: #008800; font-style: italic } +/* CommentHashbang */ .chroma .ch { color: #008800; font-style: italic } +/* CommentMultiline */ .chroma .cm { color: #008800; font-style: italic } +/* CommentSingle */ .chroma .c1 { color: #008800; font-style: italic } +/* CommentSpecial */ .chroma .cs { color: #008800; font-weight: bold } +/* CommentPreproc */ .chroma .cp { color: #008800 } +/* CommentPreprocFile */ .chroma .cpf { color: #008800 } +/* GenericDeleted */ .chroma .gd { color: #a00000 } +/* GenericEmph */ .chroma .ge { font-style: italic } +/* GenericError */ .chroma .gr { color: #ff0000 } +/* GenericHeading */ .chroma .gh { color: #000080; font-weight: bold } +/* GenericInserted */ .chroma .gi { color: #00a000 } +/* GenericOutput */ .chroma .go { color: #888888 } +/* GenericPrompt */ .chroma .gp { color: #000080; font-weight: bold } +/* GenericStrong */ .chroma .gs { font-weight: bold } +/* GenericSubheading */ .chroma .gu { color: #800080; font-weight: bold } +/* GenericTraceback */ .chroma .gt { color: #0044dd } +/* GenericUnderline */ .chroma .gl { text-decoration: underline } +/* TextWhitespace */ .chroma .w { color: #bbbbbb }