diff --git a/CHANGELOG.md b/CHANGELOG.md index 3e2a7ffd41..68d4985ad8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -33,6 +33,7 @@ We use *breaking :warning:* to mark changes that are not backward compatible (re - [#4340](https://github.com/thanos-io/thanos/pull/4340) UI: now displays the duration and all annotations of an alert in the alerts page. - [#4348](https://github.com/thanos-io/thanos/pull/4348) Fixed parsing of the port in the log middleware. - [#4417](https://github.com/thanos-io/thanos/pull/4417) UI: fixed the night mode in Bucket UI. +- [#4442](https://github.com/thanos-io/thanos/pull/4442) Ruler: fix SIGHUP reload signal not working. ### Changed diff --git a/cmd/thanos/rule.go b/cmd/thanos/rule.go index ff5ca1310c..1872e6e935 100644 --- a/cmd/thanos/rule.go +++ b/cmd/thanos/rule.go @@ -80,7 +80,6 @@ type ruleConfig struct { ruleFiles []string objStoreConfig *extflag.PathOrContent dataDir string - reloadSignal <-chan struct{} lset labels.Labels } @@ -195,6 +194,7 @@ func registerRule(app *extkingpin.App) { tracer, comp, *conf, + reload, getFlagsMap(cmd.Flags()), httpLogOpts, grpcLogOpts, @@ -257,6 +257,7 @@ func runRule( tracer opentracing.Tracer, comp component.Component, conf ruleConfig, + reloadSignal <-chan struct{}, flagsMap map[string]string, httpLogOpts []logging.Option, grpcLogOpts []grpc_logging.Option, @@ -483,7 +484,7 @@ func runRule( } for { select { - case <-conf.reloadSignal: + case <-reloadSignal: if err := reloadRules(logger, conf.ruleFiles, ruleMgr, conf.evalInterval, metrics); err != nil { level.Error(logger).Log("msg", "reload rules by sighup failed", "err", err) } diff --git a/test/e2e/rule_test.go b/test/e2e/rule_test.go index e2ee815132..3e7f6a2069 100644 --- a/test/e2e/rule_test.go +++ b/test/e2e/rule_test.go @@ -71,9 +71,39 @@ groups: severity: page annotations: summary: "I always complain and I have been loaded via /-/reload." +` + testAlertRuleAddedLaterSignal = ` +groups: +- name: example + interval: 1s + partial_response_strategy: "WARN" + rules: + - alert: TestAlert_HasBeenLoadedViaWebHandler + # It must be based on actual metric, otherwise call to StoreAPI would be not involved. + expr: absent(some_metric) + labels: + severity: page + annotations: + summary: "I always complain and I have been loaded via sighup signal." +- name: example2 + interval: 1s + partial_response_strategy: "WARN" + rules: + - alert: TestAlert_HasBeenLoadedViaWebHandler + # It must be based on actual metric, otherwise call to StoreAPI would be not involved. + expr: absent(some_metric) + labels: + severity: page + annotations: + summary: "I always complain and I have been loaded via sighup signal." ` ) +type rulesResp struct { + Status string + Data *rulespb.RuleGroups +} + func createRuleFile(t *testing.T, path, content string) { t.Helper() err := ioutil.WriteFile(path, []byte(content), 0666) @@ -97,6 +127,30 @@ func reloadRulesHTTP(t *testing.T, ctx context.Context, endpoint string) { testutil.Equals(t, 200, resp.StatusCode) } +func reloadRulesSignal(t *testing.T, r *e2ethanos.Service) { + c := e2e.NewCommand("kill", "-1", "1") + _, _, err := r.Exec(c) + testutil.Ok(t, err) +} + +func checkReloadSuccessful(t *testing.T, ctx context.Context, endpoint string, expectedRulegroupCount int) { + req, err := http.NewRequestWithContext(ctx, "GET", "http://"+endpoint+"/api/v1/rules", ioutil.NopCloser(bytes.NewReader(nil))) + testutil.Ok(t, err) + resp, err := http.DefaultClient.Do(req) + testutil.Ok(t, err) + testutil.Equals(t, 200, resp.StatusCode) + + body, _ := ioutil.ReadAll(resp.Body) + testutil.Ok(t, resp.Body.Close()) + + var data = rulesResp{} + + testutil.Ok(t, json.Unmarshal(body, &data)) + testutil.Equals(t, "success", data.Status) + + testutil.Assert(t, len(data.Data.Groups) == expectedRulegroupCount, fmt.Sprintf("expected there to be %d rule groups", expectedRulegroupCount)) +} + func rulegroupCorrectData(t *testing.T, ctx context.Context, endpoint string) { req, err := http.NewRequestWithContext(ctx, "GET", "http://"+endpoint+"/api/v1/rules", ioutil.NopCloser(bytes.NewReader(nil))) testutil.Ok(t, err) @@ -108,10 +162,7 @@ func rulegroupCorrectData(t *testing.T, ctx context.Context, endpoint string) { body, err := ioutil.ReadAll(resp.Body) testutil.Ok(t, err) - var data struct { - Status string - Data *rulespb.RuleGroups - } + var data = rulesResp{} testutil.Ok(t, json.Unmarshal(body, &data)) testutil.Equals(t, "success", data.Status) @@ -317,12 +368,18 @@ func TestRule(t *testing.T) { rulegroupCorrectData(t, ctx, r.HTTPEndpoint()) }) - t.Run("reload works", func(t *testing.T) { - // Add a new rule via /-/reload. - // TODO(GiedriusS): add a test for reloading via SIGHUP. Need to extend e2e framework to expose PIDs. + t.Run("signal reload works", func(t *testing.T) { + // Add a new rule via sending sighup + createRuleFile(t, fmt.Sprintf("%s/newrule.yaml", rulesPath), testAlertRuleAddedLaterSignal) + reloadRulesSignal(t, r) + checkReloadSuccessful(t, ctx, r.HTTPEndpoint(), 4) + }) + t.Run("http reload works", func(t *testing.T) { + // Add a new rule via /-/reload. createRuleFile(t, fmt.Sprintf("%s/newrule.yaml", rulesPath), testAlertRuleAddedLaterWebHandler) reloadRulesHTTP(t, ctx, r.HTTPEndpoint()) + checkReloadSuccessful(t, ctx, r.HTTPEndpoint(), 3) }) queryAndAssertSeries(t, ctx, q.HTTPEndpoint(), "ALERTS", promclient.QueryOptions{