Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix/prometheus rules missing #1073

Closed
wants to merge 13 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion pkg/cluster/embed/autogen_pkger.go

Large diffs are not rendered by default.

19 changes: 12 additions & 7 deletions pkg/cluster/spec/grafana.go
Original file line number Diff line number Diff line change
Expand Up @@ -208,16 +208,11 @@ func (i *GrafanaInstance) InitConfig(

func (i *GrafanaInstance) initDashboards(ctx context.Context, e ctxt.Executor, spec GrafanaSpec, paths meta.DirPaths, clusterName string) error {
dashboardsDir := filepath.Join(paths.Deploy, "dashboards")
if spec.DashboardDir != "" {
return i.TransferLocalConfigDir(ctx, e, spec.DashboardDir, dashboardsDir, func(name string) bool {
return strings.HasSuffix(name, ".json")
})
}

cmds := []string{
"mkdir -p %[1]s",
`find %[1]s -maxdepth 1 -type f -name "*.json" -delete`,
"cp %[2]s/bin/*.json %[1]s",
`find %[2]s/bin -maxdepth 1 -type f -name "*.json" -exec cp {} %[1]s \;`,
}
_, stderr, err := e.Execute(ctx, fmt.Sprintf(strings.Join(cmds, " && "), dashboardsDir, paths.Deploy), false)
if err != nil {
Expand All @@ -238,6 +233,16 @@ func (i *GrafanaInstance) initDashboards(ctx context.Context, e ctxt.Executor, s
}
}

// The original Prometheus-v4.0.x.tar.gz contained some useful dashboards maintained by PingCAP
// These dashboards are sufficient for normal use case,
// if you want to specify other dashboards, use the `DashboardDir`.
// WARNING: RuleDir will overwrite a file with same name.
if spec.DashboardDir != "" {
return i.TransferLocalConfigDir(ctx, e, spec.DashboardDir, dashboardsDir, func(name string) bool {
return strings.HasSuffix(name, ".json")
})
}

return nil
}

Expand Down Expand Up @@ -275,7 +280,7 @@ func (i *GrafanaInstance) installDashboards(ctx context.Context, e ctxt.Executor
cmds := []string{
"mkdir -p %[1]s",
`find %[1]s -maxdepth 1 -type f -name "*.json" -delete`,
"cp %[2]s/dm-master/scripts/*.json %[1]s",
`find %[2]s/dm-master/scripts -type f -name "*.json" -exec cp {} %[1]s \;`,
"rm -rf %[2]s",
}
_, stderr, err = e.Execute(ctx, fmt.Sprintf(strings.Join(cmds, " && "), targetDir, tmp), false)
Expand Down
9 changes: 8 additions & 1 deletion pkg/cluster/spec/grafana_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,16 @@ func TestLocalDashboards(t *testing.T) {
deployDir, err := ioutil.TempDir("", "tiup-*")
assert.Nil(t, err)
defer os.RemoveAll(deployDir)
// the dashboard json files are under the bin dir,
// which is needed to copy into the dashboard dir
err = os.MkdirAll(path.Join(deployDir, "bin"), 0755)
assert.Nil(t, err)
localDir, err := filepath.Abs("./testdata/dashboards")
assert.Nil(t, err)

err = ioutil.WriteFile(path.Join(deployDir, "bin", "dummy.json"), []byte(`{"dummy": "dummy"}`), 0644)
assert.Nil(t, err)

topo := new(Specification)
topo.Grafanas = append(topo.Grafanas, GrafanaSpec{
Host: "127.0.0.1",
Expand All @@ -60,7 +67,7 @@ func TestLocalDashboards(t *testing.T) {
err = grafanaInstance.initDashboards(ctx, e, topo.Grafanas[0], meta.DirPaths{Deploy: deployDir}, clusterName)
assert.Nil(t, err)

assert.FileExists(t, path.Join(deployDir, "dashboards", "tidb.json"))
assert.FileExists(t, path.Join(deployDir, "dashboards", "dummy.json"))
fs, err := ioutil.ReadDir(localDir)
assert.Nil(t, err)
for _, f := range fs {
Expand Down
21 changes: 13 additions & 8 deletions pkg/cluster/spec/prometheus.go
Original file line number Diff line number Diff line change
Expand Up @@ -314,7 +314,7 @@ func (i *MonitorInstance) installRules(ctx context.Context, e ctxt.Executor, dep
cmds := []string{
"mkdir -p %[1]s",
`find %[1]s -type f -name "*.rules.yml" -delete`,
"cp %[2]s/dm-master/conf/*.rules.yml %[1]s",
`find %[2]s/dm-master/conf -type f -name "*.rules.yml" -exec cp %[1]s \;`,
"rm -rf %[2]s",
}
_, stderr, err = e.Execute(ctx, fmt.Sprintf(strings.Join(cmds, " && "), targetDir, tmp), false)
Expand All @@ -326,23 +326,28 @@ func (i *MonitorInstance) installRules(ctx context.Context, e ctxt.Executor, dep
}

func (i *MonitorInstance) initRules(ctx context.Context, e ctxt.Executor, spec PrometheusSpec, paths meta.DirPaths) error {
if spec.RuleDir != "" {
return i.TransferLocalConfigDir(ctx, e, spec.RuleDir, path.Join(paths.Deploy, "conf"), func(name string) bool {
return strings.HasSuffix(name, ".rules.yml")
})
}

// To make this step idempotent, we need cleanup old rules first
cmds := []string{
"mkdir -p %[1]s/conf",
`find %[1]s/conf -type f -name "*.rules.yml" -delete`,
`cp %[1]s/bin/prometheus/*.rules.yml %[1]s/conf/`,
`find %[1]s/bin/prometheus -maxdepth 1 -type f -name "*.rules.yml" -exec cp {} %[1]s/conf/ \;`,
}
_, stderr, err := e.Execute(ctx, fmt.Sprintf(strings.Join(cmds, " && "), paths.Deploy), false)
if err != nil {
return errors.Annotatef(err, "stderr: %s", string(stderr))
}

// The original Prometheus-v4.0.x.tar.gz contained a rule file for each component maintained by PingCAP
// such as tidb.rules.yml, tiflash.rules.yml ...
// These rules files are sufficient for normal use case,
// if you want to specify other rules, use the `RuleDir`.
// WARNING: RuleDir will overwrite a file with same name.
if spec.RuleDir != "" {
return i.TransferLocalConfigDir(ctx, e, spec.RuleDir, path.Join(paths.Deploy, "conf"), func(name string) bool {
return strings.HasSuffix(name, ".rules.yml")
})
}

return nil
}

Expand Down
73 changes: 73 additions & 0 deletions pkg/cluster/spec/prometheus_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
// Copyright 2020 PingCAP, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// See the License for the specific language governing permissions and
// limitations under the License.

package spec

import (
"context"
"io/ioutil"
"os"
"os/user"
"path"
"path/filepath"
"testing"

"github.com/pingcap/tiup/pkg/checkpoint"
"github.com/pingcap/tiup/pkg/cluster/executor"
"github.com/pingcap/tiup/pkg/meta"
"github.com/stretchr/testify/assert"
)

func TestLocalRuleDirs(t *testing.T) {
deployDir, err := ioutil.TempDir("", "tiup-*")
assert.Nil(t, err)
defer os.RemoveAll(deployDir)
// the dashboard json files are under the bin dir,
// which is needed to copy into the dashboard dir
err = os.MkdirAll(path.Join(deployDir, "bin/prometheus"), 0755)
assert.Nil(t, err)
localDir, err := filepath.Abs("./testdata/rules")
assert.Nil(t, err)

err = ioutil.WriteFile(path.Join(deployDir, "bin/prometheus", "dummy.rules.yml"), []byte("dummy"), 0644)
assert.Nil(t, err)

topo := new(Specification)
topo.Monitors = append(topo.Monitors, PrometheusSpec{
Host: "127.0.0.1",
Port: 9090,
RuleDir: localDir,
})

comp := MonitorComponent{topo}
ints := comp.Instances()

assert.Equal(t, len(ints), 1)
promInstance := ints[0].(*MonitorInstance)

user, err := user.Current()
assert.Nil(t, err)
e, err := executor.New(executor.SSHTypeNone, false, executor.SSHConfig{Host: "127.0.0.1", User: user.Username})
assert.Nil(t, err)

ctx := checkpoint.NewContext(context.Background())
err = promInstance.initRules(ctx, e, promInstance.InstanceSpec.(PrometheusSpec), meta.DirPaths{Deploy: deployDir})
assert.Nil(t, err)

assert.FileExists(t, path.Join(deployDir, "conf", "dummy.rules.yml"))
fs, err := ioutil.ReadDir(localDir)
assert.Nil(t, err)
for _, f := range fs {
assert.FileExists(t, path.Join(deployDir, "conf", f.Name()))
}
}
15 changes: 15 additions & 0 deletions pkg/cluster/spec/testdata/rules/tidb.rules.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# magic-string-for-test
groups:
- name: alert.rules
rules:
- alert: TiDB_schema_error
expr: increase(tidb_session_schema_lease_error_total{type="outdated"}[15m]) > 0
for: 1m
labels:
env: ENV_LABELS_ENV
level: emergency
expr: increase(tidb_session_schema_lease_error_total{type="outdated"}[15m]) > 0
annotations:
description: "cluster: ENV_LABELS_ENV, instance: {{ $labels.instance }}, values:{{ $value }}"
value: "{{ $value }}"
summary: TiDB schema error
44 changes: 0 additions & 44 deletions templates/config/dm/prometheus.yml.tpl

This file was deleted.

2 changes: 2 additions & 0 deletions tests/tiup-cluster/script/scale_tools.sh
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ function scale_tools() {
tiup-cluster $client --yes deploy $name $version $topo -i ~/.ssh/id_rsa

# check the local config
tiup-cluster $client exec $name -N n1 --command "ls /home/tidb/deploy/prometheus-9090/conf/{node,tidb,tikv}.rules.yml"
tiup-cluster $client exec $name -N n1 --command "ls /home/tidb/deploy/grafana-3000/dashboards/{tidb,tidb_summary,tikv_summary}.json"
tiup-cluster $client exec $name -N n1 --command "grep magic-string-for-test /home/tidb/deploy/prometheus-9090/conf/tidb.rules.yml"
tiup-cluster $client exec $name -N n1 --command "grep magic-string-for-test /home/tidb/deploy/grafana-3000/dashboards/tidb.json"
tiup-cluster $client exec $name -N n1 --command "grep magic-string-for-test /home/tidb/deploy/alertmanager-9093/conf/alertmanager.yml"
Expand Down