Skip to content
This repository has been archived by the owner on Jul 24, 2024. It is now read-only.

validate: manual reset pd config back #530

Merged
merged 9 commits into from
Sep 27, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 43 additions & 0 deletions cmd/validate.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
"encoding/hex"
"encoding/json"
"fmt"
"strings"

"github.com/gogo/protobuf/proto"
"github.com/pingcap/errors"
Expand All @@ -20,6 +21,7 @@ import (
"github.com/tikv/pd/pkg/mock/mockid"
"go.uber.org/zap"

"github.com/pingcap/br/pkg/conn"
"github.com/pingcap/br/pkg/restore"
"github.com/pingcap/br/pkg/rtree"
"github.com/pingcap/br/pkg/task"
Expand All @@ -45,6 +47,7 @@ func NewValidateCommand() *cobra.Command {
meta.AddCommand(newBackupMetaCommand())
meta.AddCommand(decodeBackupMetaCommand())
meta.AddCommand(encodeBackupMetaCommand())
meta.AddCommand(setPDConfigCommand())
meta.Hidden = true

return meta
Expand Down Expand Up @@ -323,3 +326,43 @@ func encodeBackupMetaCommand() *cobra.Command {
}
return encodeBackupMetaCmd
}

func setPDConfigCommand() *cobra.Command {
pdConfigCmd := &cobra.Command{
Use: "reset-pd-config-as-default",
Short: "reset pd scheduler and config adjusted by BR to default value",
RunE: func(cmd *cobra.Command, args []string) error {
ctx, cancel := context.WithCancel(GetDefaultContext())
defer cancel()

var cfg task.Config
if err := cfg.ParseFromFlags(cmd.Flags()); err != nil {
return err
}

mgr, err := task.NewMgr(ctx, tidbGlue, cfg.PD, cfg.TLS, cfg.CheckRequirements)
if err != nil {
return err
}
defer mgr.Close()

for scheduler := range conn.Schedulers {
if strings.HasPrefix(scheduler, "balance") {
err := mgr.AddScheduler(ctx, scheduler)
if err != nil {
return err
}
log.Info("add pd schedulers succeed",
zap.String("schedulers", scheduler))
}
}

if err := mgr.UpdatePDScheduleConfig(ctx, conn.DefaultPDCfg); err != nil {
return errors.Annotate(err, "fail to update PD merge config")
}
log.Info("add pd configs succeed", zap.Any("config", conn.DefaultPDCfg))
return nil
},
}
return pdConfigCmd
}
15 changes: 13 additions & 2 deletions pkg/conn/scheduler_utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@ type clusterConfig struct {
}

var (
schedulers = map[string]struct{}{
// Schedulers represent region/leader schedulers which can impact on performance.
Schedulers = map[string]struct{}{
"balance-leader-scheduler": {},
"balance-hot-region-scheduler": {},
"balance-region-scheduler": {},
Expand All @@ -30,6 +31,7 @@ var (
"shuffle-region-scheduler": {},
"shuffle-hot-region-scheduler": {},
}

pdRegionMergeCfg = []string{
"max-merge-region-keys",
"max-merge-region-size",
Expand All @@ -39,6 +41,15 @@ var (
"region-schedule-limit",
"max-snapshot-count",
}

// DefaultPDCfg find by https://github.com/tikv/pd/blob/master/conf/config.toml.
DefaultPDCfg = map[string]interface{}{
"max-merge-region-keys": 200000,
"max-merge-region-size": 20,
"leader-schedule-limit": 4,
"region-schedule-limit": 2048,
"max-snapshot-count": 3,
}
)

func addPDLeaderScheduler(ctx context.Context, mgr *Mgr, removedSchedulers []string) error {
Expand Down Expand Up @@ -101,7 +112,7 @@ func (mgr *Mgr) RemoveSchedulers(ctx context.Context) (undo utils.UndoFunc, err
}
needRemoveSchedulers := make([]string, 0, len(existSchedulers))
for _, s := range existSchedulers {
if _, ok := schedulers[s]; ok {
if _, ok := Schedulers[s]; ok {
needRemoveSchedulers = append(needRemoveSchedulers, s)
}
}
Expand Down
2 changes: 1 addition & 1 deletion pkg/task/backup.go
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ func RunBackup(c context.Context, g glue.Glue, cmdName string, cfg *BackupConfig
if err != nil {
return err
}
mgr, err := newMgr(ctx, g, cfg.PD, cfg.TLS, cfg.CheckRequirements)
mgr, err := NewMgr(ctx, g, cfg.PD, cfg.TLS, cfg.CheckRequirements)
if err != nil {
return err
}
Expand Down
2 changes: 1 addition & 1 deletion pkg/task/backup_raw.go
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ func RunBackupRaw(c context.Context, g glue.Glue, cmdName string, cfg *RawKvConf
if err != nil {
return err
}
mgr, err := newMgr(ctx, g, cfg.PD, cfg.TLS, cfg.CheckRequirements)
mgr, err := NewMgr(ctx, g, cfg.PD, cfg.TLS, cfg.CheckRequirements)
if err != nil {
return err
}
Expand Down
4 changes: 2 additions & 2 deletions pkg/task/common.go
Original file line number Diff line number Diff line change
Expand Up @@ -277,8 +277,8 @@ func (cfg *Config) ParseFromFlags(flags *pflag.FlagSet) error {
return cfg.TLS.ParseFromFlags(flags)
}

// newMgr creates a new mgr at the given PD address.
func newMgr(ctx context.Context,
// NewMgr creates a new mgr at the given PD address.
func NewMgr(ctx context.Context,
g glue.Glue, pds []string,
tlsConfig TLSConfig,
checkRequirements bool) (*conn.Mgr, error) {
Expand Down
4 changes: 2 additions & 2 deletions pkg/task/restore.go
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ func RunRestore(c context.Context, g glue.Glue, cmdName string, cfg *RestoreConf
ctx, cancel := context.WithCancel(c)
defer cancel()

mgr, err := newMgr(ctx, g, cfg.PD, cfg.TLS, cfg.CheckRequirements)
mgr, err := NewMgr(ctx, g, cfg.PD, cfg.TLS, cfg.CheckRequirements)
if err != nil {
return err
}
Expand Down Expand Up @@ -369,7 +369,7 @@ func RunRestoreTiflashReplica(c context.Context, g glue.Glue, cmdName string, cf
ctx, cancel := context.WithCancel(c)
defer cancel()

mgr, err := newMgr(ctx, g, cfg.PD, cfg.TLS, cfg.CheckRequirements)
mgr, err := NewMgr(ctx, g, cfg.PD, cfg.TLS, cfg.CheckRequirements)
if err != nil {
return err
}
Expand Down
2 changes: 1 addition & 1 deletion pkg/task/restore_log.go
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ func RunLogRestore(c context.Context, g glue.Glue, cfg *LogRestoreConfig) error
ctx, cancel := context.WithCancel(c)
defer cancel()

mgr, err := newMgr(ctx, g, cfg.PD, cfg.TLS, cfg.CheckRequirements)
mgr, err := NewMgr(ctx, g, cfg.PD, cfg.TLS, cfg.CheckRequirements)
if err != nil {
return err
}
Expand Down
2 changes: 1 addition & 1 deletion pkg/task/restore_raw.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ func RunRestoreRaw(c context.Context, g glue.Glue, cmdName string, cfg *RestoreR
ctx, cancel := context.WithCancel(c)
defer cancel()

mgr, err := newMgr(ctx, g, cfg.PD, cfg.TLS, cfg.CheckRequirements)
mgr, err := NewMgr(ctx, g, cfg.PD, cfg.TLS, cfg.CheckRequirements)
if err != nil {
return err
}
Expand Down
30 changes: 28 additions & 2 deletions tests/br_other/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -95,8 +95,34 @@ fi

# make sure we won't stuck in non-scheduler state, even we send a SIGTERM to it.
# give enough time to BR so it can gracefully stop.
sleep 10
! curl http://$PD_ADDR/pd/api/v1/config/schedule | grep '"disable": true'
sleep 5
if curl http://$PD_ADDR/pd/api/v1/config/schedule | jq '[."schedulers-v2"][0][0]' | grep -q '"disable": false'
then
echo "TEST: [$TEST_NAME] failed because scheduler has not been removed"
exit 1
fi

pd_settings=5
# we need reset pd scheduler/config to default
# until pd has the solution to temporary set these scheduler/configs.
run_br validate reset-pd-config-as-default

# max-merge-region-size set to default 20
curl http://$PD_ADDR/pd/api/v1/config/schedule | jq '."max-merge-region-size"' | grep "20" || ((pd_settings--))

# max-merge-region-keys set to default 200000
curl http://$PD_ADDR/pd/api/v1/config/schedule | jq '."max-merge-region-keys"' | grep "200000" || ((pd_settings--))
# balance-region scheduler enabled
curl http://$PD_ADDR/pd/api/v1/config/schedule | jq '."schedulers-v2"[] | {disable: .disable, type: ."type" | select (.=="balance-region")}' | grep '"disable": false' || ((pd_settings--))
# balance-leader scheduler enabled
curl http://$PD_ADDR/pd/api/v1/config/schedule | jq '."schedulers-v2"[] | {disable: .disable, type: ."type" | select (.=="balance-leader")}' | grep '"disable": false' || ((pd_settings--))
# hot region scheduler enabled
curl http://$PD_ADDR/pd/api/v1/config/schedule | jq '."schedulers-v2"[] | {disable: .disable, type: ."type" | select (.=="hot-region")}' | grep '"disable": false' || ((pd_settings--))

if [ "$pd_settings" -ne "5" ];then
echo "TEST: [$TEST_NAME] test validate reset pd config failed!"
exit 1
fi

run_sql "DROP DATABASE $DB;"

Expand Down