diff --git a/CHANGELOG.md b/CHANGELOG.md index 25ffcf972c..825efa655f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,12 +13,14 @@ We use *breaking* word for marking changes that are not backward compatible (rel ## Added - [#1538](https://github.com/thanos-io/thanos/pull/1538) Added `/-/ready` and `/-/healthy` endpoints to Thanos Rule. --[1533](https://github.com/thanos-io/thanos/pull/1533) Thanos inspect now supports the timeout flag. +- [#1537](https://github.com/thanos-io/thanos/pull/1537) Added `/-/ready` and `/-/healthy` endpoints to Thanos Receive. +- [#1534](https://github.com/thanos-io/thanos/pull/1534) Added `/-/ready` endpoint to Thanos Query. +- [#1533](https://github.com/thanos-io/thanos/pull/1533) Thanos inspect now supports the timeout flag. ### Fixed --[#1525](https://github.com/thanos-io/thanos/pull/1525) Thanos now deletes block's file in correct order allowing to detect partial blocks without problems. --[#1505](https://github.com/thanos-io/thanos/pull/1505) Thanos store now removes invalid local cache blocks. +- [#1525](https://github.com/thanos-io/thanos/pull/1525) Thanos now deletes block's file in correct order allowing to detect partial blocks without problems. +- [#1505](https://github.com/thanos-io/thanos/pull/1505) Thanos store now removes invalid local cache blocks. ## v0.7.0 - 2019.09.02 diff --git a/cmd/thanos/compact.go b/cmd/thanos/compact.go index 5b2e277fc6..01347caba2 100644 --- a/cmd/thanos/compact.go +++ b/cmd/thanos/compact.go @@ -168,9 +168,9 @@ func runCompact( downsampleMetrics := newDownsampleMetrics(reg) statusProber := prober.NewProber(component, logger, prometheus.WrapRegistererWithPrefix("thanos_", reg)) - // Initiate default HTTP listener providing metrics endpoint and readiness/liveness probes. + // Initiate HTTP listener providing metrics endpoint and readiness/liveness probes. if err := scheduleHTTPServer(g, logger, reg, statusProber, httpBindAddr, nil, component); err != nil { - return errors.Wrap(err, "create default HTTP server with readiness prober") + return errors.Wrap(err, "schedule HTTP server with probes") } confContentYaml, err := objStoreConfig.Content() diff --git a/cmd/thanos/main.go b/cmd/thanos/main.go index a5143be371..a7a583915f 100644 --- a/cmd/thanos/main.go +++ b/cmd/thanos/main.go @@ -80,7 +80,7 @@ func main() { registerCompact(cmds, app) registerBucket(cmds, app, "bucket") registerDownsample(cmds, app, "downsample") - registerReceive(cmds, app, "receive") + registerReceive(cmds, app) registerChecks(cmds, app, "check") cmd, err := app.Parse(os.Args[1:]) diff --git a/cmd/thanos/query.go b/cmd/thanos/query.go index 515c57de61..594ac90de3 100644 --- a/cmd/thanos/query.go +++ b/cmd/thanos/query.go @@ -414,9 +414,9 @@ func runQuery( api.Register(router.WithPrefix(path.Join(webRoutePrefix, "/api/v1")), tracer, logger, ins) - // Initiate default HTTP listener providing metrics endpoint and readiness/liveness probes. + // Initiate HTTP listener providing metrics endpoint and readiness/liveness probes. if err := scheduleHTTPServer(g, logger, reg, statusProber, httpBindAddr, router, comp); err != nil { - return errors.Wrap(err, "create default HTTP server with readiness prober") + return errors.Wrap(err, "schedule HTTP server with probes") } } // Start query (proxy) gRPC StoreAPI. diff --git a/cmd/thanos/receive.go b/cmd/thanos/receive.go index ed6115425c..72dd8752b0 100644 --- a/cmd/thanos/receive.go +++ b/cmd/thanos/receive.go @@ -20,6 +20,7 @@ import ( "github.com/thanos-io/thanos/pkg/block/metadata" "github.com/thanos-io/thanos/pkg/component" "github.com/thanos-io/thanos/pkg/objstore/client" + "github.com/thanos-io/thanos/pkg/prober" "github.com/thanos-io/thanos/pkg/receive" "github.com/thanos-io/thanos/pkg/runutil" "github.com/thanos-io/thanos/pkg/shipper" @@ -28,11 +29,12 @@ import ( kingpin "gopkg.in/alecthomas/kingpin.v2" ) -func registerReceive(m map[string]setupFunc, app *kingpin.Application, name string) { - cmd := app.Command(name, "Accept Prometheus remote write API requests and write to local tsdb (EXPERIMENTAL, this may change drastically without notice)") +func registerReceive(m map[string]setupFunc, app *kingpin.Application) { + comp := component.Receive + cmd := app.Command(comp.String(), "Accept Prometheus remote write API requests and write to local tsdb (EXPERIMENTAL, this may change drastically without notice)") grpcBindAddr, cert, key, clientCA := regGRPCFlags(cmd) - httpMetricsBindAddr := regHTTPAddrFlag(cmd) + httpBindAddr := regHTTPAddrFlag(cmd) remoteWriteAddress := cmd.Flag("remote-write.address", "Address to listen on for remote write requests."). Default("0.0.0.0:19291").String() @@ -62,7 +64,7 @@ func registerReceive(m map[string]setupFunc, app *kingpin.Application, name stri tsdbBlockDuration := modelDuration(cmd.Flag("tsdb.block-duration", "Duration for local TSDB blocks").Default("2h").Hidden()) - m[name] = func(g *run.Group, logger log.Logger, reg *prometheus.Registry, tracer opentracing.Tracer, _ bool) error { + m[comp.String()] = func(g *run.Group, logger log.Logger, reg *prometheus.Registry, tracer opentracing.Tracer, _ bool) error { lset, err := parseFlagLabels(*labelStrs) if err != nil { return errors.Wrap(err, "parse labels") @@ -97,7 +99,7 @@ func registerReceive(m map[string]setupFunc, app *kingpin.Application, name stri *cert, *key, *clientCA, - *httpMetricsBindAddr, + *httpBindAddr, *remoteWriteAddress, *dataDir, objStoreConfig, @@ -109,6 +111,7 @@ func registerReceive(m map[string]setupFunc, app *kingpin.Application, name stri *replicaHeader, *replicationFactor, *tsdbBlockDuration, + comp, ) } } @@ -122,7 +125,7 @@ func runReceive( cert string, key string, clientCA string, - httpMetricsBindAddr string, + httpBindAddr string, remoteWriteAddress string, dataDir string, objStoreConfig *pathOrContent, @@ -134,6 +137,7 @@ func runReceive( replicaHeader string, replicationFactor uint64, tsdbBlockDuration model.Duration, + comp component.Component, ) error { logger = log.With(logger, "component", "receive") level.Warn(logger).Log("msg", "setting up receive; the Thanos receive component is EXPERIMENTAL, it may break significantly without notice") @@ -159,6 +163,8 @@ func runReceive( ReplicationFactor: replicationFactor, }) + statusProber := prober.NewProber(comp, logger, prometheus.WrapRegistererWithPrefix("thanos_", reg)) + // Start all components while we wait for TSDB to open but only load // initial config and mark ourselves as ready after it completed. dbOpen := make(chan struct{}) @@ -198,6 +204,7 @@ func runReceive( ) } + hashringReady := make(chan struct{}) level.Debug(logger).Log("msg", "setting up hashring") { updates := make(chan receive.Hashring) @@ -227,15 +234,20 @@ func runReceive( func() error { select { case h := <-updates: + close(hashringReady) webHandler.Hashring(h) + statusProber.SetReady() case <-cancel: + close(hashringReady) return nil } select { // If any new hashring is received, then mark the handler as unready, but keep it alive. case <-updates: + msg := "hashring has changed; server is not ready to receive web requests." webHandler.Hashring(nil) - level.Info(logger).Log("msg", "hashring has changed; server is not ready to receive web requests.") + statusProber.SetNotReady(errors.New(msg)) + level.Info(logger).Log("msg", msg) case <-cancel: return nil } @@ -248,9 +260,10 @@ func runReceive( ) } - level.Debug(logger).Log("msg", "setting up metric http listen-group") - if err := metricHTTPListenGroup(g, logger, reg, httpMetricsBindAddr); err != nil { - return err + level.Debug(logger).Log("msg", "setting up http server") + // Initiate HTTP listener providing metrics endpoint and readiness/liveness probes. + if err := scheduleHTTPServer(g, logger, reg, statusProber, httpBindAddr, nil, comp); err != nil { + return errors.Wrap(err, "schedule HTTP server with probes") } level.Debug(logger).Log("msg", "setting up grpc server") @@ -277,6 +290,8 @@ func runReceive( } s := newStoreGRPCServer(logger, reg, tracer, tsdbStore, opts) + // Wait hashring to be ready before start serving metrics + <-hashringReady level.Info(logger).Log("msg", "listening for StoreAPI gRPC", "address", grpcBindAddr) return errors.Wrap(s.Serve(l), "serve gRPC") }, func(error) { @@ -343,6 +358,5 @@ func runReceive( } level.Info(logger).Log("msg", "starting receiver") - return nil } diff --git a/cmd/thanos/sidecar.go b/cmd/thanos/sidecar.go index 68afd666e4..aab6be8e55 100644 --- a/cmd/thanos/sidecar.go +++ b/cmd/thanos/sidecar.go @@ -120,9 +120,9 @@ func runSidecar( } statusProber := prober.NewProber(comp, logger, prometheus.WrapRegistererWithPrefix("thanos_", reg)) - // Initiate default HTTP listener providing metrics endpoint and readiness/liveness probes. + // Initiate HTTP listener providing metrics endpoint and readiness/liveness probes. if err := scheduleHTTPServer(g, logger, reg, statusProber, httpBindAddr, nil, comp); err != nil { - return errors.Wrap(err, "create default HTTP server with readiness prober") + return errors.Wrap(err, "schedule HTTP server with probes") } // Setup all the concurrent groups.