From 952ad22a1455f9dabd69a549aae8da3fef94fd1b Mon Sep 17 00:00:00 2001 From: luancheng Date: Fri, 25 Sep 2020 15:37:37 +0800 Subject: [PATCH 1/5] validate: manual reset pd config back --- cmd/validate.go | 54 +++++++++++++++++++++++++++++++++++++++++ pkg/task/backup.go | 2 +- pkg/task/backup_raw.go | 2 +- pkg/task/common.go | 4 +-- pkg/task/restore.go | 4 +-- pkg/task/restore_log.go | 2 +- pkg/task/restore_raw.go | 2 +- 7 files changed, 62 insertions(+), 8 deletions(-) diff --git a/cmd/validate.go b/cmd/validate.go index 638ca0502..5f7993ea8 100644 --- a/cmd/validate.go +++ b/cmd/validate.go @@ -45,6 +45,7 @@ func NewValidateCommand() *cobra.Command { meta.AddCommand(newBackupMetaCommand()) meta.AddCommand(decodeBackupMetaCommand()) meta.AddCommand(encodeBackupMetaCommand()) + meta.AddCommand(setPDConfigCommand()) meta.Hidden = true return meta @@ -323,3 +324,56 @@ func encodeBackupMetaCommand() *cobra.Command { } return encodeBackupMetaCmd } + +func setPDConfigCommand() *cobra.Command { + pdConfigCmd := &cobra.Command{ + Use: "reset-pd-config-as-default", + Short: "reset pd scheduler and config adjusted by BR to default value", + RunE: func(cmd *cobra.Command, args []string) error { + ctx, cancel := context.WithCancel(GetDefaultContext()) + defer cancel() + + var cfg task.Config + if err := cfg.ParseFromFlags(cmd.Flags()); err != nil { + return err + } + + mgr, err := task.NewMgr(ctx, tidbGlue, cfg.PD, cfg.TLS, cfg.CheckRequirements) + if err != nil { + return err + } + defer mgr.Close() + + // these schedulers open by default + defaultSchedulers := []string{ + "balance-leader-scheduler", + "balance-hot-region-scheduler", + "balance-region-scheduler", + } + for _, sche := range defaultSchedulers { + err := mgr.AddScheduler(ctx, sche) + if err != nil { + return err + } + } + log.Info("add pd schedulers succeed", + zap.Strings("schedulers", defaultSchedulers)) + + // set default config find by + // https://github.com/tikv/pd/blob/master/conf/config.toml + defaultMergeCfg := map[string]interface{}{ + "max-merge-region-keys": 200000, + "max-merge-region-size": 20, + "leader-schedule-limit": 4, + "region-schedule-limit": 2048, + "max-snapshot-count": 3, + } + if err := mgr.UpdatePDScheduleConfig(ctx, defaultMergeCfg); err != nil { + return errors.Annotate(err, "fail to update PD merge config") + } + log.Info("add pd configs succeed", zap.Any("config", defaultMergeCfg)) + return nil + }, + } + return pdConfigCmd +} diff --git a/pkg/task/backup.go b/pkg/task/backup.go index 321bb16e1..ae724d49a 100644 --- a/pkg/task/backup.go +++ b/pkg/task/backup.go @@ -175,7 +175,7 @@ func RunBackup(c context.Context, g glue.Glue, cmdName string, cfg *BackupConfig if err != nil { return err } - mgr, err := newMgr(ctx, g, cfg.PD, cfg.TLS, cfg.CheckRequirements) + mgr, err := NewMgr(ctx, g, cfg.PD, cfg.TLS, cfg.CheckRequirements) if err != nil { return err } diff --git a/pkg/task/backup_raw.go b/pkg/task/backup_raw.go index fa6a15e72..be89fa456 100644 --- a/pkg/task/backup_raw.go +++ b/pkg/task/backup_raw.go @@ -125,7 +125,7 @@ func RunBackupRaw(c context.Context, g glue.Glue, cmdName string, cfg *RawKvConf if err != nil { return err } - mgr, err := newMgr(ctx, g, cfg.PD, cfg.TLS, cfg.CheckRequirements) + mgr, err := NewMgr(ctx, g, cfg.PD, cfg.TLS, cfg.CheckRequirements) if err != nil { return err } diff --git a/pkg/task/common.go b/pkg/task/common.go index 6f21bf359..67cf8e20e 100644 --- a/pkg/task/common.go +++ b/pkg/task/common.go @@ -277,8 +277,8 @@ func (cfg *Config) ParseFromFlags(flags *pflag.FlagSet) error { return cfg.TLS.ParseFromFlags(flags) } -// newMgr creates a new mgr at the given PD address. -func newMgr(ctx context.Context, +// NewMgr creates a new mgr at the given PD address. +func NewMgr(ctx context.Context, g glue.Glue, pds []string, tlsConfig TLSConfig, checkRequirements bool) (*conn.Mgr, error) { diff --git a/pkg/task/restore.go b/pkg/task/restore.go index c91e56abf..3cdeae7a8 100644 --- a/pkg/task/restore.go +++ b/pkg/task/restore.go @@ -93,7 +93,7 @@ func RunRestore(c context.Context, g glue.Glue, cmdName string, cfg *RestoreConf ctx, cancel := context.WithCancel(c) defer cancel() - mgr, err := newMgr(ctx, g, cfg.PD, cfg.TLS, cfg.CheckRequirements) + mgr, err := NewMgr(ctx, g, cfg.PD, cfg.TLS, cfg.CheckRequirements) if err != nil { return err } @@ -369,7 +369,7 @@ func RunRestoreTiflashReplica(c context.Context, g glue.Glue, cmdName string, cf ctx, cancel := context.WithCancel(c) defer cancel() - mgr, err := newMgr(ctx, g, cfg.PD, cfg.TLS, cfg.CheckRequirements) + mgr, err := NewMgr(ctx, g, cfg.PD, cfg.TLS, cfg.CheckRequirements) if err != nil { return err } diff --git a/pkg/task/restore_log.go b/pkg/task/restore_log.go index 5c2854e43..986d9a48a 100644 --- a/pkg/task/restore_log.go +++ b/pkg/task/restore_log.go @@ -97,7 +97,7 @@ func RunLogRestore(c context.Context, g glue.Glue, cfg *LogRestoreConfig) error ctx, cancel := context.WithCancel(c) defer cancel() - mgr, err := newMgr(ctx, g, cfg.PD, cfg.TLS, cfg.CheckRequirements) + mgr, err := NewMgr(ctx, g, cfg.PD, cfg.TLS, cfg.CheckRequirements) if err != nil { return err } diff --git a/pkg/task/restore_raw.go b/pkg/task/restore_raw.go index a4ca0a446..64dba80cd 100644 --- a/pkg/task/restore_raw.go +++ b/pkg/task/restore_raw.go @@ -50,7 +50,7 @@ func RunRestoreRaw(c context.Context, g glue.Glue, cmdName string, cfg *RestoreR ctx, cancel := context.WithCancel(c) defer cancel() - mgr, err := newMgr(ctx, g, cfg.PD, cfg.TLS, cfg.CheckRequirements) + mgr, err := NewMgr(ctx, g, cfg.PD, cfg.TLS, cfg.CheckRequirements) if err != nil { return err } From fba4684e7efd23abcaa31da4eac161ebd645f4df Mon Sep 17 00:00:00 2001 From: luancheng Date: Sun, 27 Sep 2020 15:56:28 +0800 Subject: [PATCH 2/5] add integration test for new command --- tests/br_other/run.sh | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/tests/br_other/run.sh b/tests/br_other/run.sh index 7d968610e..f23dfd85c 100644 --- a/tests/br_other/run.sh +++ b/tests/br_other/run.sh @@ -95,8 +95,33 @@ fi # make sure we won't stuck in non-scheduler state, even we send a SIGTERM to it. # give enough time to BR so it can gracefully stop. -sleep 10 -! curl http://$PD_ADDR/pd/api/v1/config/schedule | grep '"disable": true' +sleep 5 +if curl http://$PD_ADDR/pd/api/v1/config/schedule | jq '[."schedulers-v2"][0][0]' | grep -q '"disable": false' +then + echo "TEST: [$TEST_NAME] failed because scheduler has not been removed" + exit 1 +fi + +pd_settings=0 +# we need reset pd scheduler/config to default +# until pd has the solution to temporary set these scheduler/configs. +run_br validate reset-pd-config-as-default + +# max-merge-region-size set to default 20 +curl http://$PD_ADDR/pd/api/v1/config/schedule | jq '[."max-merge-region-size"]' | grep "20" || ((pd_settings++)) +# max-merge-region-keys set to default 200000 +curl http://$PD_ADDR/pd/api/v1/config/schedule | jq '[."max-merge-region-keys"]' | grep "200000" || ((pd_settings++)) +# balance-region scheduler enabled +curl http://$PD_ADDR/pd/api/v1/config/schedule | jq '[."schedulers-v2"][0][0]' | grep '"disable": false' || ((pd_settings++)) +# balance-leader scheduler enabled +curl http://$PD_ADDR/pd/api/v1/config/schedule | jq '[."schedulers-v2"][0][1]' | grep '"disable": false' || ((pd_settings++)) +# hot region scheduler enabled +curl http://$PD_ADDR/pd/api/v1/config/schedule | jq '[."schedulers-v2"][0][2]' | grep '"disable": false' || ((pd_settings++)) + +if [ "$pd_settings" -ne "5" ];then + echo "TEST: [$TEST_NAME] test validate reset pd config failed!" + exit 1 +fi run_sql "DROP DATABASE $DB;" From b050d8b6dac861bb339f499ac255987955a57a0f Mon Sep 17 00:00:00 2001 From: luancheng Date: Sun, 27 Sep 2020 16:58:44 +0800 Subject: [PATCH 3/5] fix test --- tests/br_other/run.sh | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/br_other/run.sh b/tests/br_other/run.sh index f23dfd85c..dc3395cef 100644 --- a/tests/br_other/run.sh +++ b/tests/br_other/run.sh @@ -102,21 +102,21 @@ then exit 1 fi -pd_settings=0 +pd_settings=5 # we need reset pd scheduler/config to default # until pd has the solution to temporary set these scheduler/configs. run_br validate reset-pd-config-as-default # max-merge-region-size set to default 20 -curl http://$PD_ADDR/pd/api/v1/config/schedule | jq '[."max-merge-region-size"]' | grep "20" || ((pd_settings++)) +curl http://$PD_ADDR/pd/api/v1/config/schedule | jq '[."max-merge-region-size"]' | grep "20" || ((pd_settings--)) # max-merge-region-keys set to default 200000 -curl http://$PD_ADDR/pd/api/v1/config/schedule | jq '[."max-merge-region-keys"]' | grep "200000" || ((pd_settings++)) +curl http://$PD_ADDR/pd/api/v1/config/schedule | jq '[."max-merge-region-keys"]' | grep "200000" || ((pd_settings--)) # balance-region scheduler enabled -curl http://$PD_ADDR/pd/api/v1/config/schedule | jq '[."schedulers-v2"][0][0]' | grep '"disable": false' || ((pd_settings++)) +curl http://$PD_ADDR/pd/api/v1/config/schedule | jq '[."schedulers-v2"][0][0]' | grep '"disable": false' || ((pd_settings--)) # balance-leader scheduler enabled -curl http://$PD_ADDR/pd/api/v1/config/schedule | jq '[."schedulers-v2"][0][1]' | grep '"disable": false' || ((pd_settings++)) +curl http://$PD_ADDR/pd/api/v1/config/schedule | jq '[."schedulers-v2"][0][1]' | grep '"disable": false' || ((pd_settings--)) # hot region scheduler enabled -curl http://$PD_ADDR/pd/api/v1/config/schedule | jq '[."schedulers-v2"][0][2]' | grep '"disable": false' || ((pd_settings++)) +curl http://$PD_ADDR/pd/api/v1/config/schedule | jq '[."schedulers-v2"][0][2]' | grep '"disable": false' || ((pd_settings--)) if [ "$pd_settings" -ne "5" ];then echo "TEST: [$TEST_NAME] test validate reset pd config failed!" From b4102a06b7e042bc7a36b48c0d6f451b90def660 Mon Sep 17 00:00:00 2001 From: luancheng Date: Sun, 27 Sep 2020 17:23:26 +0800 Subject: [PATCH 4/5] address comment --- cmd/validate.go | 35 ++++++++++++----------------------- pkg/conn/scheduler_utils.go | 15 +++++++++++++-- tests/br_other/run.sh | 4 ++-- 3 files changed, 27 insertions(+), 27 deletions(-) diff --git a/cmd/validate.go b/cmd/validate.go index 5f7993ea8..86ebcc9e0 100644 --- a/cmd/validate.go +++ b/cmd/validate.go @@ -9,6 +9,7 @@ import ( "encoding/hex" "encoding/json" "fmt" + "strings" "github.com/gogo/protobuf/proto" "github.com/pingcap/errors" @@ -20,6 +21,7 @@ import ( "github.com/tikv/pd/pkg/mock/mockid" "go.uber.org/zap" + "github.com/pingcap/br/pkg/conn" "github.com/pingcap/br/pkg/restore" "github.com/pingcap/br/pkg/rtree" "github.com/pingcap/br/pkg/task" @@ -344,34 +346,21 @@ func setPDConfigCommand() *cobra.Command { } defer mgr.Close() - // these schedulers open by default - defaultSchedulers := []string{ - "balance-leader-scheduler", - "balance-hot-region-scheduler", - "balance-region-scheduler", - } - for _, sche := range defaultSchedulers { - err := mgr.AddScheduler(ctx, sche) - if err != nil { - return err + for scheduler := range conn.Schedulers { + if strings.HasPrefix(scheduler, "balance") { + err := mgr.AddScheduler(ctx, scheduler) + if err != nil { + return err + } + log.Info("add pd schedulers succeed", + zap.String("schedulers", scheduler)) } } - log.Info("add pd schedulers succeed", - zap.Strings("schedulers", defaultSchedulers)) - // set default config find by - // https://github.com/tikv/pd/blob/master/conf/config.toml - defaultMergeCfg := map[string]interface{}{ - "max-merge-region-keys": 200000, - "max-merge-region-size": 20, - "leader-schedule-limit": 4, - "region-schedule-limit": 2048, - "max-snapshot-count": 3, - } - if err := mgr.UpdatePDScheduleConfig(ctx, defaultMergeCfg); err != nil { + if err := mgr.UpdatePDScheduleConfig(ctx, conn.DefaultPDCfg); err != nil { return errors.Annotate(err, "fail to update PD merge config") } - log.Info("add pd configs succeed", zap.Any("config", defaultMergeCfg)) + log.Info("add pd configs succeed", zap.Any("config", conn.DefaultPDCfg)) return nil }, } diff --git a/pkg/conn/scheduler_utils.go b/pkg/conn/scheduler_utils.go index 56b9e12b5..b31f3415b 100644 --- a/pkg/conn/scheduler_utils.go +++ b/pkg/conn/scheduler_utils.go @@ -21,7 +21,8 @@ type clusterConfig struct { } var ( - schedulers = map[string]struct{}{ + // Schedulers represent region/leader schedulers which can impact on performance. + Schedulers = map[string]struct{}{ "balance-leader-scheduler": {}, "balance-hot-region-scheduler": {}, "balance-region-scheduler": {}, @@ -30,6 +31,7 @@ var ( "shuffle-region-scheduler": {}, "shuffle-hot-region-scheduler": {}, } + pdRegionMergeCfg = []string{ "max-merge-region-keys", "max-merge-region-size", @@ -39,6 +41,15 @@ var ( "region-schedule-limit", "max-snapshot-count", } + + // DefaultPDCfg find by https://github.com/tikv/pd/blob/master/conf/config.toml. + DefaultPDCfg = map[string]interface{}{ + "max-merge-region-keys": 200000, + "max-merge-region-size": 20, + "leader-schedule-limit": 4, + "region-schedule-limit": 2048, + "max-snapshot-count": 3, + } ) func addPDLeaderScheduler(ctx context.Context, mgr *Mgr, removedSchedulers []string) error { @@ -101,7 +112,7 @@ func (mgr *Mgr) RemoveSchedulers(ctx context.Context) (undo utils.UndoFunc, err } needRemoveSchedulers := make([]string, 0, len(existSchedulers)) for _, s := range existSchedulers { - if _, ok := schedulers[s]; ok { + if _, ok := Schedulers[s]; ok { needRemoveSchedulers = append(needRemoveSchedulers, s) } } diff --git a/tests/br_other/run.sh b/tests/br_other/run.sh index dc3395cef..baf2531de 100644 --- a/tests/br_other/run.sh +++ b/tests/br_other/run.sh @@ -108,9 +108,9 @@ pd_settings=5 run_br validate reset-pd-config-as-default # max-merge-region-size set to default 20 -curl http://$PD_ADDR/pd/api/v1/config/schedule | jq '[."max-merge-region-size"]' | grep "20" || ((pd_settings--)) +curl http://$PD_ADDR/pd/api/v1/config/schedule | jq '."max-merge-region-size"' | grep "20" || ((pd_settings--)) # max-merge-region-keys set to default 200000 -curl http://$PD_ADDR/pd/api/v1/config/schedule | jq '[."max-merge-region-keys"]' | grep "200000" || ((pd_settings--)) +curl http://$PD_ADDR/pd/api/v1/config/schedule | jq '."max-merge-region-keys"' | grep "200000" || ((pd_settings--)) # balance-region scheduler enabled curl http://$PD_ADDR/pd/api/v1/config/schedule | jq '[."schedulers-v2"][0][0]' | grep '"disable": false' || ((pd_settings--)) # balance-leader scheduler enabled From 448c0c4a7ecddd685fb1bff7b6ade6fe89226c33 Mon Sep 17 00:00:00 2001 From: luancheng Date: Sun, 27 Sep 2020 19:24:57 +0800 Subject: [PATCH 5/5] address commment --- tests/br_other/run.sh | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tests/br_other/run.sh b/tests/br_other/run.sh index baf2531de..c4e5f3cf6 100644 --- a/tests/br_other/run.sh +++ b/tests/br_other/run.sh @@ -109,14 +109,15 @@ run_br validate reset-pd-config-as-default # max-merge-region-size set to default 20 curl http://$PD_ADDR/pd/api/v1/config/schedule | jq '."max-merge-region-size"' | grep "20" || ((pd_settings--)) + # max-merge-region-keys set to default 200000 curl http://$PD_ADDR/pd/api/v1/config/schedule | jq '."max-merge-region-keys"' | grep "200000" || ((pd_settings--)) # balance-region scheduler enabled -curl http://$PD_ADDR/pd/api/v1/config/schedule | jq '[."schedulers-v2"][0][0]' | grep '"disable": false' || ((pd_settings--)) +curl http://$PD_ADDR/pd/api/v1/config/schedule | jq '."schedulers-v2"[] | {disable: .disable, type: ."type" | select (.=="balance-region")}' | grep '"disable": false' || ((pd_settings--)) # balance-leader scheduler enabled -curl http://$PD_ADDR/pd/api/v1/config/schedule | jq '[."schedulers-v2"][0][1]' | grep '"disable": false' || ((pd_settings--)) +curl http://$PD_ADDR/pd/api/v1/config/schedule | jq '."schedulers-v2"[] | {disable: .disable, type: ."type" | select (.=="balance-leader")}' | grep '"disable": false' || ((pd_settings--)) # hot region scheduler enabled -curl http://$PD_ADDR/pd/api/v1/config/schedule | jq '[."schedulers-v2"][0][2]' | grep '"disable": false' || ((pd_settings--)) +curl http://$PD_ADDR/pd/api/v1/config/schedule | jq '."schedulers-v2"[] | {disable: .disable, type: ."type" | select (.=="hot-region")}' | grep '"disable": false' || ((pd_settings--)) if [ "$pd_settings" -ne "5" ];then echo "TEST: [$TEST_NAME] test validate reset pd config failed!"