diff --git a/_utils/terror_gen/errors_release.txt b/_utils/terror_gen/errors_release.txt index 82e948e554..825e8372bf 100644 --- a/_utils/terror_gen/errors_release.txt +++ b/_utils/terror_gen/errors_release.txt @@ -328,10 +328,9 @@ ErrMasterGenEmbedEtcdConfigFail,[code=38037:class=dm-master:scope=internal:level ErrMasterStartEmbedEtcdFail,[code=38038:class=dm-master:scope=internal:level=high],"fail to start embed etcd" ErrMasterParseURLFail,[code=38039:class=dm-master:scope=internal:level=high],"fail to parse URL %s" ErrMasterJoinEmbedEtcdFail,[code=38040:class=dm-master:scope=internal:level=high],"fail to join embed etcd: %s" -ErrMasterCoordinatorNotStart,[code=38041:class=dm-master:scope=internal:level=high],"coordinator does not start" -ErrMasterAcquireWorkerFailed,[code=38042:class=dm-master:scope=internal:level=medium],"acquire worker failed: %s" -ErrMasterAdvertiseAddrNotValid,[code=38043:class=dm-master:scope=internal:level=high],"advertise address %s not valid" -ErrMasterRequestIsNotForwardToLeader,[code=38044:class=dm-master:scope=internal:level=high],"master is not leader, and can't forward request to leader" +ErrMasterInvalidOperateTaskOp,[code=38041:class=dm-master:scope=internal:level=medium],"invalid op %s on task" +ErrMasterAdvertiseAddrNotValid,[code=38042:class=dm-master:scope=internal:level=high],"advertise address %s not valid" +ErrMasterRequestIsNotForwardToLeader,[code=38043:class=dm-master:scope=internal:level=high],"master is not leader, and can't forward request to leader" ErrWorkerParseFlagSet,[code=40001:class=dm-worker:scope=internal:level=medium],"parse dm-worker config flag set" ErrWorkerInvalidFlag,[code=40002:class=dm-worker:scope=internal:level=medium],"'%s' is an invalid flag" ErrWorkerDecodeConfigFromFile,[code=40003:class=dm-worker:scope=internal:level=medium],"toml decode file" @@ -422,3 +421,20 @@ ErrSchemaTrackerCannotGetTable,[code=44005:class=schema-tracker:scope=internal:l ErrSchemaTrackerCannotExecDDL,[code=44006:class=schema-tracker:scope=internal:level=high],"cannot track DDL: %s" ErrSchemaTrackerCannotFetchDownstreamTable,[code=44007:class=schema-tracker:scope=downstream:level=medium],"cannot fetch downstream table schema of `%s`.`%s` to initialize upstream schema `%s`.`%s` in schema tracker" ErrSchemaTrackerCannotParseDownstreamTable,[code=44008:class=schema-tracker:scope=internal:level=high],"cannot parse downstream table schema of `%s`.`%s` to initialize upstream schema `%s`.`%s` in schema tracker" +ErrSchedulerNotStarted,[code=46001:class=scheduler:scope=internal:level=high],"the scheduler has not started" +ErrSchedulerStarted,[code=46002:class=scheduler:scope=internal:level=medium],"the scheduler has already started" +ErrSchedulerWorkerExist,[code=46003:class=scheduler:scope=internal:level=medium],"dm-worker with name %s already exists" +ErrSchedulerWorkerNotExist,[code=46004:class=scheduler:scope=internal:level=medium],"dm-worker with name %s not exists" +ErrSchedulerWorkerOnline,[code=46005:class=scheduler:scope=internal:level=medium],"dm-worker with name %s is still online, must shut it down first" +ErrSchedulerWorkerInvalidTrans,[code=46006:class=scheduler:scope=internal:level=medium],"invalid stage transformation for dm-worker %s, from %s to %s" +ErrSchedulerSourceCfgExist,[code=46007:class=scheduler:scope=internal:level=medium],"source config with ID %s already exists" +ErrSchedulerSourceCfgNotExist,[code=46008:class=scheduler:scope=internal:level=medium],"source config with ID %s not exists" +ErrSchedulerSourcesUnbound,[code=46009:class=dm-master:scope=internal:level=medium],"sources %v have not bound" +ErrSchedulerSourceOpTaskExist,[code=46010:class=dm-master:scope=internal:level=medium],"source with name % need to operate with tasks %v exist" +ErrSchedulerRelayStageInvalidUpdate,[code=46011:class=scheduler:scope=internal:level=medium],"invalid new expectant relay stage %s" +ErrSchedulerRelayStageSourceNotExist,[code=46012:class=scheduler:scope=internal:level=medium],"sources %v need to update expectant relay stage not exist" +ErrSchedulerMultiTask,[code=46013:class=scheduler:scope=internal:level=medium],"the scheduler cannot perform multiple different tasks %v in one operation" +ErrSchedulerSubTaskExist,[code=46014:class=scheduler:scope=internal:level=medium],"subtasks with name %s for sources %v already exist" +ErrSchedulerSubTaskStageInvalidUpdate,[code=46015:class=dm-master:scope=internal:level=medium],"invalid new expectant subtask stage %s" +ErrSchedulerSubTaskOpTaskNotExist,[code=46016:class=dm-master:scope=internal:level=medium],"subtasks with name %s need to be operate not exist" +ErrSchedulerSubTaskOpSourceNotExist,[code=46017:class=dm-master:scope=internal:level=medium],"sources %v need to be operate not exist" diff --git a/cmd/dm-syncer/config.go b/cmd/dm-syncer/config.go index 49c3ad7454..fccf2c6ecc 100644 --- a/cmd/dm-syncer/config.go +++ b/cmd/dm-syncer/config.go @@ -308,11 +308,10 @@ func (oc *oldConfig) convertToNewFormat() (*config.SubTaskConfig, error) { return nil, errors.Trace(err) } newTask := &config.SubTaskConfig{ - Name: "dm-syncer", - SourceID: "dm-syncer-from-old-config", - DisableHeartbeat: true, - Mode: config.ModeIncrement, - Meta: meta, + Name: "dm-syncer", + SourceID: "dm-syncer-from-old-config", + Mode: config.ModeIncrement, + Meta: meta, LogLevel: oc.LogLevel, LogFile: oc.LogFile, diff --git a/dm/common/common.go b/dm/common/common.go index d3e0be3991..b849667b4c 100644 --- a/dm/common/common.go +++ b/dm/common/common.go @@ -25,20 +25,26 @@ import ( var ( useOfClosedErrMsg = "use of closed network connection" // WorkerRegisterKeyAdapter used to encode and decode register key. - // k/v: Encode(addr) -> name + // k/v: Encode(name) -> the information of the DM-worker node. WorkerRegisterKeyAdapter KeyAdapter = keyHexEncoderDecoder("/dm-worker/r/") // WorkerKeepAliveKeyAdapter used to encode and decode keepalive key. - // k/v: Encode(addr,name) -> time + // k/v: Encode(name) -> time WorkerKeepAliveKeyAdapter KeyAdapter = keyHexEncoderDecoder("/dm-worker/a/") // UpstreamConfigKeyAdapter store all config of which MySQL-task has not stopped. // k/v: Encode(source-id) -> config UpstreamConfigKeyAdapter KeyAdapter = keyEncoderDecoder("/dm-master/upstream/config/") // UpstreamBoundWorkerKeyAdapter used to store address of worker in which MySQL-tasks which are running. - // k/v: Encode(addr) -> source-id + // k/v: Encode(name) -> the bound relationship. UpstreamBoundWorkerKeyAdapter KeyAdapter = keyHexEncoderDecoder("/dm-master/bound-worker/") // UpstreamSubTaskKeyAdapter used to store SubTask which are subscribing data from MySQL source. // k/v: Encode(source-id, task-name) -> SubTaskConfig UpstreamSubTaskKeyAdapter KeyAdapter = keyHexEncoderDecoder("/dm-master/upstream/subtask/") + // StageRelayKeyAdapter used to store the running stage of the relay. + // k/v: Encode(source-id) -> the running stage of the relay. + StageRelayKeyAdapter KeyAdapter = keyEncoderDecoder("/dm-master/stage/relay/") + // StageSubTaskKeyAdapter used to store the running stage of the subtask. + // k/v: Encode(source-id, task-name) -> the running stage of the subtask. + StageSubTaskKeyAdapter KeyAdapter = keyHexEncoderDecoder("/dm-master/stage/subtask/") // ShardDDLPessimismInfoKeyAdapter used to store shard DDL info in pessimistic model. // k/v: Encode(task-name, source-id) -> shard DDL info @@ -50,9 +56,10 @@ var ( func keyAdapterKeysLen(s KeyAdapter) int { switch s { - case WorkerRegisterKeyAdapter, UpstreamConfigKeyAdapter, UpstreamBoundWorkerKeyAdapter: + case WorkerRegisterKeyAdapter, UpstreamConfigKeyAdapter, UpstreamBoundWorkerKeyAdapter, + WorkerKeepAliveKeyAdapter, StageRelayKeyAdapter: return 1 - case WorkerKeepAliveKeyAdapter, UpstreamSubTaskKeyAdapter: + case UpstreamSubTaskKeyAdapter, StageSubTaskKeyAdapter: return 2 } return -1 diff --git a/dm/common/common_test.go b/dm/common/common_test.go index 50895a99df..85c9b45c54 100644 --- a/dm/common/common_test.go +++ b/dm/common/common_test.go @@ -39,9 +39,9 @@ func (t *testCommon) TestKeyAdapter(c *C) { want: "/dm-worker/r/3132372e302e302e313a32333832", }, { - keys: []string{"127.0.0.1:2382", "worker1"}, + keys: []string{"worker1"}, adapter: WorkerKeepAliveKeyAdapter, - want: "/dm-worker/a/3132372e302e302e313a32333832/776f726b657231", + want: "/dm-worker/a/776f726b657231", }, { keys: []string{"mysql1"}, diff --git a/dm/config/checker_config.go b/dm/config/checker_config.go index 5c2b2c29ba..84de712d92 100644 --- a/dm/config/checker_config.go +++ b/dm/config/checker_config.go @@ -21,12 +21,14 @@ type Duration struct { } // MarshalText hacks to satisfy the encoding.TextMarshaler interface +// For MarshalText, we should use (d Duration) which can be used by both pointer and instance func (d Duration) MarshalText() ([]byte, error) { return []byte(d.Duration.String()), nil } // UnmarshalText hacks to satisfy the encoding.TextUnmarshaler interface -func (d Duration) UnmarshalText(text []byte) error { +// For UnmarshalText, we should use (d *Duration) to change the value of this instance instead of the copy +func (d *Duration) UnmarshalText(text []byte) error { var err error d.Duration, err = time.ParseDuration(string(text)) return err @@ -47,10 +49,10 @@ type CheckerConfig struct { BackoffRollback Duration `toml:"backoff-rollback" json:"backoff-rollback"` BackoffMax Duration `toml:"backoff-max" json:"backoff-max"` // unexpose config - CheckInterval Duration `json:"-"` - BackoffMin Duration `json:"-"` - BackoffJitter bool `json:"-"` - BackoffFactor float64 `json:"-"` + CheckInterval Duration `toml:"check-interval" json:"-"` + BackoffMin Duration `toml:"backoff-min" json:"-"` + BackoffJitter bool `toml:"backoff-jitter" json:"-"` + BackoffFactor float64 `toml:"backoff-factor" json:"-"` } // Adjust sets default value for field: CheckInterval/BackoffMin/BackoffJitter/BackoffFactor diff --git a/dm/config/mysql_config.go b/dm/config/source_config.go similarity index 86% rename from dm/config/mysql_config.go rename to dm/config/source_config.go index f6a3b373fe..84fde3bde9 100644 --- a/dm/config/mysql_config.go +++ b/dm/config/source_config.go @@ -11,13 +11,14 @@ import ( "time" "github.com/BurntSushi/toml" + "github.com/siddontang/go-mysql/mysql" + "github.com/pingcap/dm/pkg/binlog" "github.com/pingcap/dm/pkg/gtid" "github.com/pingcap/dm/pkg/log" "github.com/pingcap/dm/pkg/terror" "github.com/pingcap/dm/pkg/tracing" "github.com/pingcap/dm/pkg/utils" - "github.com/siddontang/go-mysql/mysql" ) const ( @@ -39,8 +40,8 @@ type PurgeConfig struct { RemainSpace int64 `toml:"remain-space" json:"remain-space"` // if remain space in @RelayBaseDir less than @RemainSpace (GB), then it can be purged } -// MysqlConfig is the configuration for Worker -type MysqlConfig struct { +// SourceConfig is the configuration for Worker +type SourceConfig struct { EnableGTID bool `toml:"enable-gtid" json:"enable-gtid"` AutoFixGTID bool `toml:"auto-fix-gtid" json:"auto-fix-gtid"` RelayDir string `toml:"relay-dir" json:"relay-dir"` @@ -69,9 +70,9 @@ type MysqlConfig struct { ServerID uint32 `toml:"server-id" json:"server-id"` } -// NewMysqlConfig creates a new base config for worker. -func NewMysqlConfig() *MysqlConfig { - c := &MysqlConfig{ +// NewSourceConfig creates a new base config for upstream MySQL/MariaDB source. +func NewSourceConfig() *SourceConfig { + c := &SourceConfig{ RelayDir: "relay-dir", Purge: PurgeConfig{ Interval: 60 * 60, @@ -95,14 +96,14 @@ func NewMysqlConfig() *MysqlConfig { } // Clone clones a config -func (c *MysqlConfig) Clone() *MysqlConfig { - clone := &MysqlConfig{} +func (c *SourceConfig) Clone() *SourceConfig { + clone := &SourceConfig{} *clone = *c return clone } // Toml returns TOML format representation of config -func (c *MysqlConfig) Toml() (string, error) { +func (c *SourceConfig) Toml() (string, error) { var b bytes.Buffer err := toml.NewEncoder(&b).Encode(c) @@ -114,14 +115,14 @@ func (c *MysqlConfig) Toml() (string, error) { } // Parse parses flag definitions from the argument list. -func (c *MysqlConfig) Parse(content string) error { +func (c *SourceConfig) Parse(content string) error { // Parse first to get config file. metaData, err := toml.Decode(content, c) return c.check(&metaData, err) } // EncodeToml encodes config. -func (c *MysqlConfig) EncodeToml() (string, error) { +func (c *SourceConfig) EncodeToml() (string, error) { buf := new(bytes.Buffer) if err := toml.NewEncoder(buf).Encode(c); err != nil { return "", err @@ -129,7 +130,7 @@ func (c *MysqlConfig) EncodeToml() (string, error) { return buf.String(), nil } -func (c *MysqlConfig) String() string { +func (c *SourceConfig) String() string { cfg, err := json.Marshal(c) if err != nil { log.L().Error("fail to marshal config to json", log.ShortError(err)) @@ -137,13 +138,13 @@ func (c *MysqlConfig) String() string { return string(cfg) } -func (c *MysqlConfig) adjust() { +func (c *SourceConfig) adjust() { c.From.Adjust() c.Checker.Adjust() } // Verify verifies the config -func (c *MysqlConfig) Verify() error { +func (c *SourceConfig) Verify() error { if len(c.SourceID) == 0 { return terror.ErrWorkerNeedSourceID.Generate() } @@ -175,7 +176,7 @@ func (c *MysqlConfig) Verify() error { } // DecryptPassword returns a decrypted config replica in config -func (c *MysqlConfig) DecryptPassword() (*MysqlConfig, error) { +func (c *SourceConfig) DecryptPassword() (*SourceConfig, error) { clone := c.Clone() var ( pswdFrom string @@ -192,7 +193,7 @@ func (c *MysqlConfig) DecryptPassword() (*MysqlConfig, error) { } // GenerateDBConfig creates DBConfig for DB -func (c *MysqlConfig) GenerateDBConfig() (*DBConfig, error) { +func (c *SourceConfig) GenerateDBConfig() (*DBConfig, error) { // decrypt password clone, err := c.DecryptPassword() if err != nil { @@ -203,8 +204,8 @@ func (c *MysqlConfig) GenerateDBConfig() (*DBConfig, error) { return from, nil } -// Adjust flavor and serverid of MysqlConfig -func (c *MysqlConfig) Adjust(db *sql.DB) (err error) { +// Adjust flavor and serverid of SourceConfig +func (c *SourceConfig) Adjust(db *sql.DB) (err error) { c.From.Adjust() c.Checker.Adjust() @@ -227,7 +228,7 @@ func (c *MysqlConfig) Adjust(db *sql.DB) (err error) { } // AdjustFlavor adjust Flavor from DB -func (c *MysqlConfig) AdjustFlavor(ctx context.Context, db *sql.DB) (err error) { +func (c *SourceConfig) AdjustFlavor(ctx context.Context, db *sql.DB) (err error) { if c.Flavor != "" { switch c.Flavor { case mysql.MariaDBFlavor, mysql.MySQLFlavor: @@ -245,7 +246,7 @@ func (c *MysqlConfig) AdjustFlavor(ctx context.Context, db *sql.DB) (err error) } // AdjustServerID adjust server id from DB -func (c *MysqlConfig) AdjustServerID(ctx context.Context, db *sql.DB) error { +func (c *SourceConfig) AdjustServerID(ctx context.Context, db *sql.DB) error { if c.ServerID != 0 { return nil } @@ -273,12 +274,12 @@ func (c *MysqlConfig) AdjustServerID(ctx context.Context, db *sql.DB) error { } // LoadFromFile loads config from file. -func (c *MysqlConfig) LoadFromFile(path string) error { +func (c *SourceConfig) LoadFromFile(path string) error { metaData, err := toml.DecodeFile(path, c) return c.check(&metaData, err) } -func (c *MysqlConfig) check(metaData *toml.MetaData, err error) error { +func (c *SourceConfig) check(metaData *toml.MetaData, err error) error { if err != nil { return terror.ErrWorkerDecodeConfigFromFile.Delegate(err) } diff --git a/dm/config/mysql_config_test.go b/dm/config/source_config_test.go similarity index 84% rename from dm/config/mysql_config_test.go rename to dm/config/source_config_test.go index 67963aa99d..fbbf9df490 100644 --- a/dm/config/mysql_config_test.go +++ b/dm/config/source_config_test.go @@ -20,16 +20,17 @@ import ( "io/ioutil" "path" "strings" + "time" "github.com/DATA-DOG/go-sqlmock" . "github.com/pingcap/check" "github.com/siddontang/go-mysql/mysql" ) -const mysqlTomlPath = "../worker/dm-mysql.toml" +const mysqlTomlPath = "../worker/source.toml" func (t *testConfig) TestConfig(c *C) { - cfg := &MysqlConfig{} + cfg := &SourceConfig{} c.Assert(cfg.LoadFromFile(mysqlTomlPath), IsNil) cfg.RelayDir = "./xx" @@ -73,6 +74,19 @@ func (t *testConfig) TestConfig(c *C) { c.Assert(err, IsNil) c.Assert(clone3, DeepEquals, cfg) + // test toml and parse again + clone4 := cfg.Clone() + clone4.Checker.CheckEnable = true + clone4.Checker.BackoffRollback = Duration{time.Minute * 5} + clone4.Checker.BackoffMax = Duration{time.Minute * 5} + clone4toml, err := clone4.Toml() + c.Assert(err, IsNil) + c.Assert(clone4toml, Matches, "(.|\n)*backoff-rollback = \"5m(.|\n)*") + c.Assert(clone4toml, Matches, "(.|\n)*backoff-max = \"5m(.|\n)*") + clone5 := SourceConfig{} + c.Assert(clone5.Parse(clone4toml), IsNil) + c.Assert(clone5, DeepEquals, *clone4) + // test invalid config dir2 := c.MkDir() configFile := path.Join(dir2, "dm-worker-invalid.toml") @@ -88,24 +102,24 @@ aaa = "xxx" } func (t *testConfig) TestConfigVerify(c *C) { - newConfig := func() *MysqlConfig { - cfg := &MysqlConfig{} + newConfig := func() *SourceConfig { + cfg := &SourceConfig{} c.Assert(cfg.LoadFromFile(mysqlTomlPath), IsNil) cfg.RelayDir = "./xx" return cfg } testCases := []struct { - genFunc func() *MysqlConfig + genFunc func() *SourceConfig errorFormat string }{ { - func() *MysqlConfig { + func() *SourceConfig { return newConfig() }, "", }, { - func() *MysqlConfig { + func() *SourceConfig { cfg := newConfig() cfg.SourceID = "" return cfg @@ -113,7 +127,7 @@ func (t *testConfig) TestConfigVerify(c *C) { ".*dm-worker should bind a non-empty source ID which represents a MySQL/MariaDB instance or a replica group.*", }, { - func() *MysqlConfig { + func() *SourceConfig { cfg := newConfig() cfg.SourceID = "source-id-length-more-than-thirty-two" return cfg @@ -121,7 +135,7 @@ func (t *testConfig) TestConfigVerify(c *C) { fmt.Sprintf(".*the length of source ID .* is more than max allowed value %d", MaxSourceIDLength), }, { - func() *MysqlConfig { + func() *SourceConfig { cfg := newConfig() cfg.EnableRelay = true cfg.RelayBinLogName = "mysql-binlog" @@ -130,7 +144,7 @@ func (t *testConfig) TestConfigVerify(c *C) { ".*not valid.*", }, { - func() *MysqlConfig { + func() *SourceConfig { cfg := newConfig() cfg.RelayBinLogName = "mysql-binlog" return cfg @@ -138,7 +152,7 @@ func (t *testConfig) TestConfigVerify(c *C) { "", }, { - func() *MysqlConfig { + func() *SourceConfig { cfg := newConfig() cfg.EnableRelay = true cfg.RelayBinlogGTID = "9afe121c-40c2-11e9-9ec7-0242ac110002:1-rtc" @@ -147,7 +161,7 @@ func (t *testConfig) TestConfigVerify(c *C) { ".*relay-binlog-gtid 9afe121c-40c2-11e9-9ec7-0242ac110002:1-rtc:.*", }, { - func() *MysqlConfig { + func() *SourceConfig { cfg := newConfig() cfg.From.Password = "not-encrypt" return cfg @@ -170,7 +184,7 @@ func (t *testConfig) TestConfigVerify(c *C) { } -func subtestFlavor(c *C, cfg *MysqlConfig, sqlInfo, expectedFlavor, expectedError string) { +func subtestFlavor(c *C, cfg *SourceConfig, sqlInfo, expectedFlavor, expectedError string) { cfg.Flavor = "" db, mock, err := sqlmock.New() c.Assert(err, IsNil) @@ -189,7 +203,7 @@ func subtestFlavor(c *C, cfg *MysqlConfig, sqlInfo, expectedFlavor, expectedErro } func (t *testConfig) TestAdjustFlavor(c *C) { - cfg := &MysqlConfig{} + cfg := &SourceConfig{} c.Assert(cfg.LoadFromFile(mysqlTomlPath), IsNil) cfg.RelayDir = "./xx" @@ -212,7 +226,7 @@ func (t *testConfig) TestAdjustServerID(c *C) { }() getAllServerIDFunc = getMockServerIDs - cfg := &MysqlConfig{} + cfg := &SourceConfig{} c.Assert(cfg.LoadFromFile(mysqlTomlPath), IsNil) cfg.RelayDir = "./xx" diff --git a/dm/config/subtask.go b/dm/config/subtask.go index 4cb16338e8..b31358f1ea 100644 --- a/dm/config/subtask.go +++ b/dm/config/subtask.go @@ -29,7 +29,7 @@ import ( bf "github.com/pingcap/tidb-tools/pkg/binlog-filter" column "github.com/pingcap/tidb-tools/pkg/column-mapping" "github.com/pingcap/tidb-tools/pkg/filter" - "github.com/pingcap/tidb-tools/pkg/table-router" + router "github.com/pingcap/tidb-tools/pkg/table-router" "go.uber.org/zap" ) @@ -149,7 +149,6 @@ type SubTaskConfig struct { Flavor string `toml:"flavor" json:"flavor"` MetaSchema string `toml:"meta-schema" json:"meta-schema"` RemoveMeta bool `toml:"remove-meta" json:"remove-meta"` - DisableHeartbeat bool `toml:"disable-heartbeat" json:"disable-heartbeat"` // deprecated, use !enable-heartbeat instead HeartbeatUpdateInterval int `toml:"heartbeat-update-interval" json:"heartbeat-update-interval"` HeartbeatReportInterval int `toml:"heartbeat-report-interval" json:"heartbeat-report-interval"` EnableHeartbeat bool `toml:"enable-heartbeat" json:"enable-heartbeat"` @@ -268,10 +267,6 @@ func (c *SubTaskConfig) Adjust() error { c.MetaSchema = defaultMetaSchema } - if !c.DisableHeartbeat { - c.EnableHeartbeat = true - } - if c.Timezone != "" { _, err := time.LoadLocation(c.Timezone) if err != nil { diff --git a/dm/config/task.go b/dm/config/task.go index 84403b9171..d64e99c839 100644 --- a/dm/config/task.go +++ b/dm/config/task.go @@ -239,7 +239,6 @@ type TaskConfig struct { // remove meta from downstreaming database // now we delete checkpoint and online ddl information RemoveMeta bool `yaml:"remove-meta"` - DisableHeartbeat bool `yaml:"disable-heartbeat"` // deprecated, use !enable-heartbeat instead EnableHeartbeat bool `yaml:"enable-heartbeat"` HeartbeatUpdateInterval int `yaml:"heartbeat-update-interval"` HeartbeatReportInterval int `yaml:"heartbeat-report-interval"` @@ -270,7 +269,6 @@ func NewTaskConfig() *TaskConfig { cfg := &TaskConfig{ // explicitly set default value MetaSchema: defaultMetaSchema, - DisableHeartbeat: !defaultEnableHeartbeat, EnableHeartbeat: defaultEnableHeartbeat, HeartbeatUpdateInterval: defaultUpdateInterval, HeartbeatReportInterval: defaultReportInterval, @@ -486,8 +484,7 @@ func (c *TaskConfig) SubTaskConfigs(sources map[string]DBConfig) ([]*SubTaskConf cfg.CaseSensitive = c.CaseSensitive cfg.MetaSchema = c.MetaSchema cfg.RemoveMeta = c.RemoveMeta - cfg.DisableHeartbeat = c.DisableHeartbeat - cfg.EnableHeartbeat = c.EnableHeartbeat || !c.DisableHeartbeat + cfg.EnableHeartbeat = c.EnableHeartbeat cfg.HeartbeatUpdateInterval = c.HeartbeatUpdateInterval cfg.HeartbeatReportInterval = c.HeartbeatReportInterval cfg.Timezone = c.Timezone diff --git a/dm/ctl/ctl.go b/dm/ctl/ctl.go index 071ff530a6..aac8688266 100644 --- a/dm/ctl/ctl.go +++ b/dm/ctl/ctl.go @@ -75,7 +75,7 @@ func NewRootCmd() *cobra.Command { master.NewUpdateRelayCmd(), master.NewPurgeRelayCmd(), master.NewMigrateRelayCmd(), - master.NewOperateMysqlWorkerCmd(), + master.NewOperateSourceCmd(), master.NewOfflineWorkerCmd(), ) return cmd diff --git a/dm/ctl/master/operate_mysql_worker.go b/dm/ctl/master/operate_source.go similarity index 72% rename from dm/ctl/master/operate_mysql_worker.go rename to dm/ctl/master/operate_source.go index b98128a455..1843e867b1 100644 --- a/dm/ctl/master/operate_mysql_worker.go +++ b/dm/ctl/master/operate_source.go @@ -15,38 +15,39 @@ package master import ( "context" + "os" + "github.com/pingcap/dm/dm/ctl/common" "github.com/pingcap/dm/dm/pb" "github.com/pingcap/errors" "github.com/spf13/cobra" - "os" ) -// NewOperateMysqlWorkerCmd creates a OperateMysqlWorker command -func NewOperateMysqlWorkerCmd() *cobra.Command { +// NewOperateSourceCmd creates a OperateSource command +func NewOperateSourceCmd() *cobra.Command { cmd := &cobra.Command{ - Use: "operate-worker ", - Short: "create/update/stop mysql task", - Run: operateMysqlWorkerFunc, + Use: "operate-source ", + Short: "create/update/stop upstream MySQL/MariaDB source", + Run: operateSourceFunc, } return cmd } -func convertCmdType(t string) pb.WorkerOp { +func convertCmdType(t string) pb.SourceOp { switch t { case "create": - return pb.WorkerOp_StartWorker + return pb.SourceOp_StartSource case "update": - return pb.WorkerOp_UpdateConfig + return pb.SourceOp_UpdateSource case "stop": - return pb.WorkerOp_StopWorker + return pb.SourceOp_StopSource default: - return pb.WorkerOp_InvalidWorkerOp + return pb.SourceOp_InvalidSourceOp } } // operateMysqlFunc does migrate relay request -func operateMysqlWorkerFunc(cmd *cobra.Command, _ []string) { +func operateSourceFunc(cmd *cobra.Command, _ []string) { if len(cmd.Flags().Args()) != 2 { cmd.SetOut(os.Stdout) cmd.Usage() @@ -64,13 +65,13 @@ func operateMysqlWorkerFunc(cmd *cobra.Command, _ []string) { defer cancel() op := convertCmdType(cmdType) - if op == pb.WorkerOp_InvalidWorkerOp { + if op == pb.SourceOp_InvalidSourceOp { common.PrintLines("invalid operate '%s' on worker", cmdType) return } cli := common.MasterClient() - resp, err := cli.OperateMysqlWorker(ctx, &pb.MysqlWorkerRequest{ + resp, err := cli.OperateSource(ctx, &pb.OperateSourceRequest{ Config: string(content), Op: op, }) diff --git a/dm/master/coordinator/coordinator.go b/dm/master/coordinator/coordinator.go deleted file mode 100644 index dea59efb91..0000000000 --- a/dm/master/coordinator/coordinator.go +++ /dev/null @@ -1,504 +0,0 @@ -// Copyright 2019 PingCAP, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// See the License for the specific language governing permissions and -// limitations under the License. - -package coordinator - -import ( - "context" - "sync" - "time" - - "go.etcd.io/etcd/clientv3" - "go.etcd.io/etcd/mvcc/mvccpb" - "go.uber.org/zap" - - "github.com/pingcap/dm/dm/common" - "github.com/pingcap/dm/dm/config" - "github.com/pingcap/dm/dm/master/workerrpc" - "github.com/pingcap/dm/dm/pb" - "github.com/pingcap/dm/pkg/log" - "github.com/pingcap/dm/pkg/terror" -) - -var ( - etcdTimeout = 3 * time.Second - restartMysqlWorkerTimeout = 5 * time.Second -) - -// Coordinator coordinate wrokers and upstream. -type Coordinator struct { - mu sync.RWMutex - // address -> worker - workers map[string]*Worker - // upstream(source-id) -> worker - upstreams map[string]*Worker - - // upstream(address) -> source-id - workerToSource map[string]string - - // sourceConfigs (source) -> config - sourceConfigs map[string]config.MysqlConfig - - // pending create task (sourceid) --> address - pendingReqSources map[string]string - - waitingTask chan string - etcdCli *clientv3.Client - ctx context.Context - cancel context.CancelFunc - started bool - wg sync.WaitGroup -} - -// NewCoordinator returns a coordinate. -func NewCoordinator() *Coordinator { - return &Coordinator{ - workers: make(map[string]*Worker), - workerToSource: make(map[string]string), - pendingReqSources: make(map[string]string), - upstreams: make(map[string]*Worker), - sourceConfigs: make(map[string]config.MysqlConfig), - waitingTask: make(chan string, 100000), - } -} - -// Start starts the coordinator and would recover infomation from etcd. -func (c *Coordinator) Start(ctx context.Context, etcdClient *clientv3.Client) error { - // TODO: recover upstreams and workerToSource and workers - // workers - c.mu.Lock() - defer c.mu.Unlock() - c.etcdCli = etcdClient - - // recovering. - ectx, cancel := context.WithTimeout(etcdClient.Ctx(), etcdTimeout) - defer cancel() - resp, err := etcdClient.Get(ectx, common.WorkerRegisterKeyAdapter.Path(), clientv3.WithPrefix()) - if err != nil { - return err - } - - for _, kv := range resp.Kvs { - kvs, err := common.WorkerRegisterKeyAdapter.Decode(string(kv.Key)) - if err != nil { - return terror.Annotate(err, "decode worker register key from etcd failed") - } - addr := kvs[0] - name := string(kv.Value) - c.workers[addr] = NewWorker(name, addr, nil) - log.L().Info("load worker successful", zap.String("addr", addr), zap.String("name", name)) - } - - resp, err = etcdClient.Get(ectx, common.UpstreamConfigKeyAdapter.Path(), clientv3.WithPrefix()) - if err != nil { - return nil - } - - for _, kv := range resp.Kvs { - kvs, err := common.UpstreamConfigKeyAdapter.Decode(string(kv.Key)) - if err != nil { - return terror.Annotate(err, "decode upstream config key from etcd failed") - } - sourceID := kvs[0] - cfgStr := string(kv.Value) - cfg := config.NewMysqlConfig() - err = cfg.Parse(cfgStr) - if err != nil { - log.L().Error("cannot parse config", zap.String("source", sourceID), zap.Error(err)) - continue - } - c.sourceConfigs[sourceID] = *cfg - c.schedule(sourceID) - log.L().Info("load config successful", zap.String("source", sourceID), zap.String("config", cfgStr)) - } - - resp, err = etcdClient.Get(ectx, common.UpstreamBoundWorkerKeyAdapter.Path(), clientv3.WithPrefix()) - if err != nil { - return nil - } - - for _, kv := range resp.Kvs { - kvs, err := common.UpstreamBoundWorkerKeyAdapter.Decode(string(kv.Key)) - if err != nil { - return terror.Annotate(err, "decode upstream bound worker key from etcd failed") - } - addr := kvs[0] - sourceID := string(kv.Value) - w, ok := c.workers[addr] - if !ok { - log.L().Error("worker not exist but binding relationship exist", zap.String("addr", addr), zap.String("source", sourceID)) - continue - } - gresp, err := etcdClient.Get(ectx, common.UpstreamConfigKeyAdapter.Encode(sourceID)) - if err != nil || len(gresp.Kvs) == 0 { - log.L().Error("cannot load config", zap.String("addr", addr), zap.String("source", sourceID), zap.Error(err)) - continue - } - cfgStr := string(gresp.Kvs[0].Value) - c.upstreams[sourceID] = w - c.workerToSource[addr] = sourceID - log.L().Info("load config successful", zap.String("source", sourceID), zap.String("config", cfgStr)) - } - - c.started = true - c.ctx, c.cancel = context.WithCancel(ctx) - c.wg.Add(1) - go func() { - defer c.wg.Done() - c.ObserveWorkers() - }() - log.L().Info("coordinator is started") - return nil -} - -// IsStarted checks if the coordinator is started. -func (c *Coordinator) IsStarted() bool { - c.mu.RLock() - defer c.mu.RUnlock() - return c.started -} - -// Stop stops the coordinator. -func (c *Coordinator) Stop() { - c.mu.Lock() - c.cancel() - c.started = false - c.mu.Unlock() - - c.wg.Wait() - log.L().Info("coordinator is stoped") -} - -// RemoveWorker removes the dm-worker to the coordinate. -func (c *Coordinator) RemoveWorker(address string) { - c.mu.Lock() - defer c.mu.Unlock() - delete(c.workers, address) -} - -// AddWorker add the dm-worker to the coordinate. -func (c *Coordinator) AddWorker(name string, address string, cli workerrpc.Client) { - c.mu.Lock() - defer c.mu.Unlock() - if w, ok := c.workers[address]; ok { - w.SetStatus(WorkerFree) - return - } - w := NewWorker(name, address, cli) - c.workers[address] = w -} - -// HandleStartedWorker change worker status when mysql task started -func (c *Coordinator) HandleStartedWorker(w *Worker, cfg *config.MysqlConfig, succ bool) { - c.mu.Lock() - defer c.mu.Unlock() - if succ { - c.upstreams[cfg.SourceID] = w - c.workerToSource[w.Address()] = cfg.SourceID - c.sourceConfigs[cfg.SourceID] = *cfg - } else { - w.SetStatus(WorkerFree) - } - delete(c.pendingReqSources, cfg.SourceID) -} - -// HandleStoppedWorker change worker status when mysql task stopped -func (c *Coordinator) HandleStoppedWorker(w *Worker, cfg *config.MysqlConfig) bool { - c.mu.Lock() - defer c.mu.Unlock() - delete(c.sourceConfigs, cfg.SourceID) - delete(c.upstreams, cfg.SourceID) - delete(c.workerToSource, w.Address()) - w.SetStatus(WorkerFree) - return true -} - -// AcquireWorkerForSource get the free worker to create mysql delay task, and add it to pending task -// to avoid create a task in two worker -func (c *Coordinator) AcquireWorkerForSource(source string) (*Worker, error) { - c.mu.Lock() - defer c.mu.Unlock() - if c.started == false { - return nil, terror.ErrMasterCoordinatorNotStart - } - if addr, ok := c.pendingReqSources[source]; ok { - return nil, terror.ErrMasterAcquireWorkerFailed.Generatef("the same source has been started in worker: %s", addr) - } - if _, ok := c.sourceConfigs[source]; ok { - // this check is used to avoid a situation: one task is started twice by mistake but requires two workers - // If ok is true, there are two situations: - // 1. this task is mistakenly started twice, when coordinator tried to operate on the bound worker it will report an error - // 2. this task is paused because the bound worker was out of service before, we can give this task this worker to start it again - // If ok is false, that means the try on the bound worker has failed, we can arrange this task another worker - // ATTENTION!!! This mechanism can't prevent this case, which should be discussed later: - // the task is being operating to a worker(sourceConfigs and upstreams haven't been updated), but it is started again to acquire worker - if w, ok := c.upstreams[source]; ok { - return w, nil - } - } - for _, w := range c.workers { - if w.status.Load() == WorkerFree { - // we bound worker to avoid another task trying to get it - w.status.Store(WorkerBound) - c.pendingReqSources[source] = w.Address() - return w, nil - } - } - return nil, terror.ErrMasterAcquireWorkerFailed.Generate("no free worker could start mysql task") -} - -// GetAllWorkers gets all workers. -func (c *Coordinator) GetAllWorkers() map[string]*Worker { - c.mu.RLock() - defer c.mu.RUnlock() - return c.workers -} - -// GetRunningMysqlSource gets all souce which is running. -func (c *Coordinator) GetRunningMysqlSource() map[string]*Worker { - c.mu.RLock() - defer c.mu.RUnlock() - res := make(map[string]*Worker) - for source, w := range c.upstreams { - if w.State() == WorkerBound { - res[source] = w - } - } - return res -} - -// GetWorkerBySourceID gets the worker through source id. -func (c *Coordinator) GetWorkerBySourceID(source string) *Worker { - c.mu.RLock() - defer c.mu.RUnlock() - return c.upstreams[source] -} - -// GetWorkerByAddress gets the worker through addr. -func (c *Coordinator) GetWorkerByAddress(addr string) *Worker { - c.mu.RLock() - defer c.mu.RUnlock() - return c.workers[addr] -} - -// GetConfigBySourceID gets db config through source id. -func (c *Coordinator) GetConfigBySourceID(source string) *config.MysqlConfig { - c.mu.RLock() - defer c.mu.RUnlock() - if cfg, ok := c.sourceConfigs[source]; ok { - return &cfg - } - return nil -} - -// GetWorkersByStatus gets the workers match the specified status. -func (c *Coordinator) GetWorkersByStatus(s WorkerState) []*Worker { - c.mu.RLock() - defer c.mu.RUnlock() - res := make([]*Worker, 0, len(c.workers)) - for _, w := range c.workers { - if w.State() == s { - res = append(res, w) - } - } - return res -} - -// ObserveWorkers observe the keepalive path and maintain the status of the worker. -func (c *Coordinator) ObserveWorkers() { - watcher := clientv3.NewWatcher(c.etcdCli) - ch := watcher.Watch(c.ctx, common.WorkerKeepAliveKeyAdapter.Path(), clientv3.WithPrefix()) - t1 := time.NewTicker(time.Second * 6) - for { - select { - case wresp := <-ch: - if wresp.Canceled { - log.L().Error("leader watcher is canceled with", zap.Error(wresp.Err())) - return - } - - for _, ev := range wresp.Events { - switch ev.Type { - case mvccpb.PUT: - log.L().Info("putkv", zap.String("kv", string(ev.Kv.Key))) - kvs, err := common.WorkerKeepAliveKeyAdapter.Decode(string(ev.Kv.Key)) - if err != nil { - log.L().Warn("coordinator decode worker keep alive key from etcd failed", zap.String("key", string(ev.Kv.Key)), zap.Error(err)) - continue - } - addr, name := kvs[0], kvs[1] - c.mu.Lock() - if w, ok := c.workers[addr]; ok && name == w.Name() { - state := "Free" - if source, ok := c.workerToSource[addr]; ok { - // The worker connect to master before we transfer mysqltask into another worker, - // try schedule MySQL-task. - if nowWorker, ok := c.upstreams[source]; ok && nowWorker.Address() == addr { - // If the MySQL-task is still running in this worker. - c.schedule(source) - w.SetStatus(WorkerBound) - state = "bound" - } else if _, ok := c.upstreams[source]; !ok { - // If the MySQL-task has not been assigned to others, It could try to schedule on self. - c.upstreams[source] = w - w.SetStatus(WorkerBound) - c.schedule(source) - state = "bound" - } else { - delete(c.workerToSource, addr) - w.SetStatus(WorkerFree) - state = "free" - } - } else { - // If this worker has not been in 'workerToSource', it means that this worker must have lose connect from master more than 6s, - // so the mysql task in worker had stop - w.SetStatus(WorkerFree) - } - log.L().Info("worker became online ", zap.String("name", w.Name()), zap.String("address", w.Address()), zap.String("state", state)) - } else { - // TODO: how to deal with unregister worker - } - c.mu.Unlock() - case mvccpb.DELETE: - log.L().Info("deletekv", zap.String("kv", string(ev.Kv.Key))) - kvs, err := common.WorkerKeepAliveKeyAdapter.Decode(string(ev.Kv.Key)) - if err != nil { - log.L().Warn("coordinator decode worker keep alive key from etcd failed", zap.String("key", string(ev.Kv.Key)), zap.Error(err)) - continue - } - addr, name := kvs[0], kvs[1] - c.mu.Lock() - if w, ok := c.workers[addr]; ok && name == w.Name() { - log.L().Info("worker became offline, state: closed", zap.String("name", w.Name()), zap.String("address", w.Address())) - // Set client nil, and send request use new request - w.client = nil - if source, ok := c.workerToSource[addr]; ok { - c.schedule(source) - } - } - c.mu.Unlock() - } - } - case <-c.ctx.Done(): - log.L().Info("coordinate exict due to context canceled") - return - case <-t1.C: - c.tryRestartMysqlTask() - } - } -} - -func (c *Coordinator) schedule(source string) { - c.waitingTask <- source -} - -func (c *Coordinator) tryRestartMysqlTask() { - c.mu.RLock() - defer c.mu.RUnlock() - scheduleNextLoop := make([]string, 0) - hasTaskToSchedule := true - for hasTaskToSchedule { - select { - case source := <-c.waitingTask: - log.L().Info("will schedule source", zap.String("source", source)) - if cfg, ok := c.sourceConfigs[source]; ok { - ret := false - if w, ok := c.upstreams[source]; ok { - // Try start mysql task at the same worker. - c.mu.RUnlock() - log.L().Info("try start mysql task at the same worker", zap.String("worker", w.Address())) - ret = c.restartMysqlTask(w, &cfg) - c.mu.RLock() - } else { - c.mu.RUnlock() - w, err := c.AcquireWorkerForSource(source) - if err != nil { - log.L().Error("acquire worker for source", zap.String("source", source), zap.Error(err)) - } else { - if w != nil { - ret = c.restartMysqlTask(w, &cfg) - } else { - log.L().Info("acquire worker for source get nil worker") - } - } - c.mu.RLock() - } - if !ret { - scheduleNextLoop = append(scheduleNextLoop, source) - } - } - default: - hasTaskToSchedule = false - break - } - } - - for _, source := range scheduleNextLoop { - c.waitingTask <- source - } -} - -func (c *Coordinator) restartMysqlTask(w *Worker, cfg *config.MysqlConfig) bool { - log.L().Info("try to schedule ", zap.String("source", cfg.SourceID), zap.String("address", w.Address())) - task, err := cfg.Toml() - req := &pb.MysqlWorkerRequest{ - Op: pb.WorkerOp_StartWorker, - Config: task, - } - resp, err := w.OperateMysqlWorker(context.Background(), req, restartMysqlWorkerTimeout) - ret := false - c.mu.Lock() - if err == nil { - ret = resp.Result - if resp.Result { - c.workerToSource[w.Address()] = cfg.SourceID - c.upstreams[cfg.SourceID] = w - w.SetStatus(WorkerBound) - } else { - log.L().Warn("restartMysqlTask failed", zap.String("error", resp.Msg)) - delete(c.upstreams, cfg.SourceID) - if source, ok := c.workerToSource[w.Address()]; ok { - if source == cfg.SourceID { - delete(c.workerToSource, w.Address()) - w.SetStatus(WorkerFree) - } else { - // There may be another MySQL-task having been assigned to this worker. - log.L().Warn("schedule start-task to a running-worker", zap.String("address", w.Address()), - zap.String("running-source", source), zap.String("schedule-source", cfg.SourceID)) - } - } else { - w.SetStatus(WorkerFree) - } - } - } else { - // Error means there is something wrong about network, set worker to close. - // remove sourceID from upstreams. So the source would be schedule in other worker. - log.L().Warn("operate mysql worker", zap.Error(err), zap.Stringer("request", req)) - delete(c.upstreams, cfg.SourceID) - delete(c.workerToSource, w.Address()) - w.SetStatus(WorkerClosed) - } - delete(c.pendingReqSources, cfg.SourceID) - c.mu.Unlock() - if w.State() == WorkerClosed { - ectx, cancel := context.WithTimeout(c.etcdCli.Ctx(), etcdTimeout) - defer cancel() - resp, err := c.etcdCli.Get(ectx, common.WorkerRegisterKeyAdapter.Encode(w.Address(), w.Name())) - if err != nil { - if resp.Count > 0 { - w.SetStatus(WorkerFree) - } - } - } - return ret -} diff --git a/dm/master/coordinator/worker.go b/dm/master/coordinator/worker.go deleted file mode 100644 index b9c9342c96..0000000000 --- a/dm/master/coordinator/worker.go +++ /dev/null @@ -1,116 +0,0 @@ -// Copyright 2019 PingCAP, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// See the License for the specific language governing permissions and -// limitations under the License. - -package coordinator - -import ( - "context" - "fmt" - "github.com/pingcap/dm/dm/pb" - "sync/atomic" - "time" - - "github.com/pingcap/dm/dm/master/workerrpc" -) - -// WorkerState the status of the worker -type WorkerState int - -// the status of worker -const ( - WorkerClosed WorkerState = iota + 1 - WorkerFree - WorkerBound -) - -// Worker the proc essor that let upstream and downstream synchronization. -type Worker struct { - name string - address string - client workerrpc.Client - status atomic.Value -} - -// NewWorker creates a worker with specified name and address. -func NewWorker(name, address string, cli workerrpc.Client) *Worker { - w := &Worker{ - name: name, - address: address, - client: cli, - } - w.status.Store(WorkerClosed) - return w -} - -// String formats the worker. -func (w *Worker) String() string { - return fmt.Sprintf("%s address:%s", w.name, w.address) -} - -func (w *Worker) getClient() (workerrpc.Client, error) { - if w.client == nil { - client, err := workerrpc.NewGRPCClient(w.address) - if err != nil { - return nil, err - } - w.client = client - } - return w.client, nil -} - -// Name returns the name of the worker. -func (w *Worker) Name() string { - return w.name -} - -// Address returns the address of the worker. -func (w *Worker) Address() string { - return w.address -} - -// State returns the state of the worker. -func (w *Worker) State() WorkerState { - // TODO: add more jugement. - return w.status.Load().(WorkerState) -} - -// SetStatus change the status of worker -func (w *Worker) SetStatus(s WorkerState) { - w.status.Store(s) -} - -// OperateMysqlWorker in a idle worker -func (w *Worker) OperateMysqlWorker(ctx context.Context, req *pb.MysqlWorkerRequest, d time.Duration) (*pb.MysqlWorkerResponse, error) { - ownerReq := &workerrpc.Request{ - Type: workerrpc.CmdOperateMysqlTask, - MysqlTask: req, - } - cli, err := w.getClient() - if err != nil { - return nil, err - } - resp, err := cli.SendRequest(ctx, ownerReq, d) - if err != nil { - return nil, err - } - return resp.MysqlTask, err -} - -// SendRequest by client -func (w *Worker) SendRequest(ctx context.Context, req *workerrpc.Request, d time.Duration) (*workerrpc.Response, error) { - cli, err := w.getClient() - if err != nil { - return nil, err - } - return cli.SendRequest(ctx, req, d) -} diff --git a/dm/master/election.go b/dm/master/election.go index aee05986ad..825a77c447 100644 --- a/dm/master/election.go +++ b/dm/master/election.go @@ -15,6 +15,7 @@ package master import ( "context" + "go.uber.org/zap" "time" @@ -38,12 +39,9 @@ func (s *Server) electionNotify(ctx context.Context) { switch notify { case election.IsLeader: log.L().Info("current member become the leader", zap.String("current member", s.cfg.Name)) - err := s.coordinator.Start(ctx, s.etcdClient) + err := s.scheduler.Start(ctx, s.etcdClient) if err != nil { - log.L().Error("coordinator do not started", zap.Error(err)) - } - if err = s.recoverSubTask(); err != nil { - log.L().Error("recover subtask infos from coordinator fail", zap.Error(err)) + log.L().Error("scheduler do not started", zap.Error(err)) } s.Lock() @@ -52,7 +50,7 @@ func (s *Server) electionNotify(ctx context.Context) { s.Unlock() case election.RetireFromLeader, election.IsNotLeader: if notify == election.RetireFromLeader { - s.coordinator.Stop() + s.scheduler.Close() } leader, leaderID, leaderAddr, err2 := s.election.LeaderInfo(ctx) diff --git a/dm/master/scheduler/scheduler.go b/dm/master/scheduler/scheduler.go new file mode 100644 index 0000000000..dddc56d4ef --- /dev/null +++ b/dm/master/scheduler/scheduler.go @@ -0,0 +1,1180 @@ +// Copyright 2020 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + +package scheduler + +import ( + "context" + "sort" + "sync" + + "go.etcd.io/etcd/clientv3" + "go.uber.org/zap" + + "github.com/pingcap/dm/dm/config" + "github.com/pingcap/dm/dm/master/workerrpc" + "github.com/pingcap/dm/dm/pb" + "github.com/pingcap/dm/pkg/ha" + "github.com/pingcap/dm/pkg/log" + "github.com/pingcap/dm/pkg/terror" +) + +// Scheduler schedules tasks for DM-worker instances, including: +// - register/unregister DM-worker instances. +// - observe the online/offline status of DM-worker instances. +// - observe add/remove operations for upstream sources' config. +// - schedule upstream sources to DM-worker instances. +// - schedule data migration subtask operations. +// - holds agents of DM-worker instances. +// NOTE: the DM-master server MUST wait for this scheduler become started before handling client requests. +// Cases trigger a source-to-worker bound try: +// - a worker from Offline to Free: +// - receive keep-alive. +// - a worker from Bound to Free: +// - trigger by unbound: `a source removed`. +// - a new source added: +// - add source request from user. +// - a source unbound from another worker: +// - trigger by unbound: `a worker from Bound to Offline`. +// - TODO(csuzhangxc): design a strategy to ensure the old worker already shutdown its work. +// Cases trigger a source-to-worker unbound try. +// - a worker from Bound to Offline: +// - lost keep-alive. +// - a source removed: +// - remove source request from user. +// TODO: try to handle the return `err` of etcd operations, +// because may put into etcd, but the response to the etcd client interrupted. +type Scheduler struct { + mu sync.RWMutex + + logger log.Logger + + started bool // whether the scheduler already started for work. + cancel context.CancelFunc + wg sync.WaitGroup + + etcdCli *clientv3.Client + + // all source configs, source ID -> source config. + // add: + // - add source by user request (calling `AddSourceCfg`). + // - recover from etcd (calling `recoverSources`). + // delete: + // - remove source by user request (calling `RemoveSourceCfg`). + sourceCfgs map[string]config.SourceConfig + + // all subtask configs, task name -> source ID -> subtask config. + // add: + // - add/start subtask by user request (calling `AddSubTasks`). + // - recover from etcd (calling `recoverSubTasks`). + // delete: + // - remove/stop subtask by user request (calling `RemoveSubTasks`). + subTaskCfgs map[string]map[string]config.SubTaskConfig + + // all DM-workers, worker name -> worker. + // add: + // - add worker by user request (calling `AddWorker`). + // - recover from etcd (calling `recoverWorkersBounds`). + // delete: + // - remove worker by user request (calling `RemoveWorker`). + workers map[string]*Worker + + // all bound relationship, source ID -> worker. + // add: + // - when bounding a source to a worker. + // delete: + // - when unbounding a source from a worker. + // see `Cases trigger a source-to-worker bound try` above. + bounds map[string]*Worker + + // unbound (pending to bound) sources. + // NOTE: refactor to support scheduling by priority. + // add: + // - add source by user request (calling `AddSourceCfg`). + // - recover from etcd (calling `recoverWorkersBounds`). + // - when the bounding worker become offline. + // delete: + // - remove source by user request (calling `RemoveSourceCfg`). + // - when bounded the source to a worker. + unbounds map[string]struct{} + + // expectant relay stages for sources, source ID -> stage. + // add: + // - bound the source to a worker (at first time). + // - recover from etcd (calling `recoverSources`). + // update: + // - update stage by user request (calling `UpdateExpectRelayStage`). + // delete: + // - remove source by user request (calling `RemoveSourceCfg`). + expectRelayStages map[string]ha.Stage + + // expectant subtask stages for tasks & sources, task name -> source ID -> stage. + // add: + // - add/start subtask by user request (calling `AddSubTasks`). + // - recover from etcd (calling `recoverSubTasks`). + // update: + // - update stage by user request (calling `UpdateExpectSubTaskStage`). + // delete: + // - remove/stop subtask by user request (calling `RemoveSubTasks`). + expectSubTaskStages map[string]map[string]ha.Stage +} + +// NewScheduler creates a new scheduler instance. +func NewScheduler(pLogger *log.Logger) *Scheduler { + return &Scheduler{ + logger: pLogger.WithFields(zap.String("component", "scheduler")), + sourceCfgs: make(map[string]config.SourceConfig), + subTaskCfgs: make(map[string]map[string]config.SubTaskConfig), + workers: make(map[string]*Worker), + bounds: make(map[string]*Worker), + unbounds: make(map[string]struct{}), + expectRelayStages: make(map[string]ha.Stage), + expectSubTaskStages: make(map[string]map[string]ha.Stage), + } +} + +// Start starts the scheduler for work. +func (s *Scheduler) Start(pCtx context.Context, etcdCli *clientv3.Client) error { + s.logger.Info("the scheduler is starting") + + s.mu.Lock() + defer s.mu.Unlock() + + if s.started { + return terror.ErrSchedulerStarted.Generate() + } + + s.reset() // reset previous status. + + // recover previous status from etcd. + err := s.recoverSources(etcdCli) + if err != nil { + return err + } + err = s.recoverSubTasks(etcdCli) + if err != nil { + return err + } + rev, err := s.recoverWorkersBounds(etcdCli) + if err != nil { + return err + } + + ctx, cancel := context.WithCancel(pCtx) + + // starting to observe status of DM-worker instances. + workerEvCh := make(chan ha.WorkerEvent, 10) + workerErrCh := make(chan error, 10) + s.wg.Add(2) + go func() { + defer func() { + s.wg.Done() + close(workerEvCh) + close(workerErrCh) + }() + ha.WatchWorkerEvent(ctx, etcdCli, rev+1, workerEvCh, workerErrCh) + }() + go func() { + defer s.wg.Done() + s.handleWorkerEv(ctx, workerEvCh, workerErrCh) + }() + + s.started = true // started now + s.cancel = cancel + s.etcdCli = etcdCli + s.logger.Info("the scheduler has started") + return nil +} + +// Close closes the scheduler. +func (s *Scheduler) Close() { + s.mu.Lock() + + if !s.started { + s.mu.Unlock() + return + } + + s.logger.Info("the scheduler is closing") + if s.cancel != nil { + s.cancel() + s.cancel = nil + } + s.mu.Unlock() + + // need to wait for goroutines to return which may hold the mutex. + s.wg.Wait() + + s.mu.Lock() + defer s.mu.Unlock() + s.started = false // closed now. + s.logger.Info("the scheduler has closed") +} + +// AddSourceCfg adds the upstream source config to the cluster. +// NOTE: please verify the config before call this. +func (s *Scheduler) AddSourceCfg(cfg config.SourceConfig) error { + s.mu.Lock() + defer s.mu.Unlock() + + if !s.started { + return terror.ErrSchedulerNotStarted.Generate() + } + + // 1. check whether exists. + if _, ok := s.sourceCfgs[cfg.SourceID]; ok { + return terror.ErrSchedulerSourceCfgExist.Generate(cfg.SourceID) + } + + // 2. put the config into etcd. + _, err := ha.PutSourceCfg(s.etcdCli, cfg) + if err != nil { + return err + } + + // 3. record the config in the scheduler. + s.sourceCfgs[cfg.SourceID] = cfg + + // 4. try to bound it to a Free worker. + bounded, err := s.tryBoundForSource(cfg.SourceID) + if err != nil { + return err + } else if !bounded { + // 5. record the source as unbounded. + s.unbounds[cfg.SourceID] = struct{}{} + } + return nil +} + +// RemoveSourceCfg removes the upstream source config in the cluster. +// when removing the upstream source config, it should also remove: +// - any existing relay stage. +// - any source-worker bound relationship. +func (s *Scheduler) RemoveSourceCfg(source string) error { + s.mu.Lock() + defer s.mu.Unlock() + + if !s.started { + return terror.ErrSchedulerNotStarted.Generate() + } + + // 1. check whether the config exists. + _, ok := s.sourceCfgs[source] + if !ok { + return terror.ErrSchedulerSourceCfgNotExist.Generate(source) + } + + // 2. check whether any subtask exists for the source. + existingSubtasksM := make(map[string]struct{}) + for task, cfg := range s.subTaskCfgs { + for source2 := range cfg { + if source2 == source { + existingSubtasksM[task] = struct{}{} + } + } + } + existingSubtasks := strMapToSlice(existingSubtasksM) + if len(existingSubtasks) > 0 { + return terror.ErrSchedulerSourceOpTaskExist.Generate(source, existingSubtasks) + } + + // 3. find worker name by source ID. + var ( + workerName string // empty should be fine below. + worker *Worker + ) + if w, ok := s.bounds[source]; ok { + worker = w + workerName = w.BaseInfo().Name + } + + // 4. delete the info in etcd. + _, err := ha.DeleteSourceCfgRelayStageSourceBound(s.etcdCli, source, workerName) + if err != nil { + return err + } + + // 5. delete the config and expectant stage in the scheduler + delete(s.sourceCfgs, source) + delete(s.expectRelayStages, source) + + // 6. unbound for the source. + s.updateStatusForUnbound(source) + + // 7. remove it from unbounds. + delete(s.unbounds, source) + + // 8. try to bound the worker for another source. + if worker != nil { + _, err = s.tryBoundForWorker(worker) + if err != nil { + return err + } + } + return nil +} + +// GetSourceCfgByID gets source config by source ID. +func (s *Scheduler) GetSourceCfgByID(source string) *config.SourceConfig { + s.mu.RLock() + defer s.mu.RUnlock() + cfg, ok := s.sourceCfgs[source] + if !ok { + return nil + } + clone := cfg + return &clone +} + +// AddSubTasks adds the information of one or more subtasks for one task. +func (s *Scheduler) AddSubTasks(cfgs ...config.SubTaskConfig) error { + s.mu.Lock() + defer s.mu.Unlock() + + if !s.started { + return terror.ErrSchedulerNotStarted.Generate() + } + + if len(cfgs) == 0 { + return nil // no subtasks need to add, this should not happen. + } + + // 1. check whether exists. + var ( + taskNamesM = make(map[string]struct{}, 1) + existSourcesM = make(map[string]struct{}, len(cfgs)) + ) + for _, cfg := range cfgs { + taskNamesM[cfg.Name] = struct{}{} + cfgM, ok := s.subTaskCfgs[cfg.Name] + if !ok { + continue + } + _, ok = cfgM[cfg.SourceID] + if !ok { + continue + } + existSourcesM[cfg.SourceID] = struct{}{} + } + taskNames := strMapToSlice(taskNamesM) + existSources := strMapToSlice(existSourcesM) + if len(taskNames) > 1 { + // only subtasks from one task supported now. + return terror.ErrSchedulerMultiTask.Generate(taskNames) + } else if len(existSources) == len(cfgs) { + // all subtasks already exist, return an error. + return terror.ErrSchedulerSubTaskExist.Generate(taskNames[0], existSources) + } else if len(existSources) > 0 { + // some subtasks already exists, log a warn. + s.logger.Warn("some subtasks already exist", zap.String("task", taskNames[0]), zap.Strings("sources", existSources)) + } + + // 2. construct `Running` stages when adding. + newCfgs := make([]config.SubTaskConfig, 0, len(cfgs)-len(existSources)) + newStages := make([]ha.Stage, 0, cap(newCfgs)) + unbounds := make([]string, 0) + for _, cfg := range cfgs { + if _, ok := existSourcesM[cfg.SourceID]; ok { + continue + } + newCfgs = append(newCfgs, cfg) + newStages = append(newStages, ha.NewSubTaskStage(pb.Stage_Running, cfg.SourceID, cfg.Name)) + if _, ok := s.bounds[cfg.SourceID]; !ok { + unbounds = append(unbounds, cfg.SourceID) + } + } + + // 3. check whether any sources unbound. + if len(unbounds) > 0 { + return terror.ErrSchedulerSourcesUnbound.Generate(unbounds) + } + + // 4. put the configs and stages into etcd. + _, err := ha.PutSubTaskCfgStage(s.etcdCli, newCfgs, newStages) + if err != nil { + return err + } + + // 5. record the config and the expectant stage. + for _, cfg := range newCfgs { + if _, ok := s.subTaskCfgs[cfg.Name]; !ok { + s.subTaskCfgs[cfg.Name] = make(map[string]config.SubTaskConfig) + } + s.subTaskCfgs[cfg.Name][cfg.SourceID] = cfg + } + for _, stage := range newStages { + if _, ok := s.expectSubTaskStages[stage.Task]; !ok { + s.expectSubTaskStages[stage.Task] = make(map[string]ha.Stage) + } + s.expectSubTaskStages[stage.Task][stage.Source] = stage + } + + return nil +} + +// RemoveSubTasks removes the information of one or more subtaks for one task. +func (s *Scheduler) RemoveSubTasks(task string, sources ...string) error { + s.mu.Lock() + defer s.mu.Unlock() + + if !s.started { + return terror.ErrSchedulerNotStarted.Generate() + } + + if task == "" || len(sources) == 0 { + return nil // no subtask need to stop, this should not happen. + } + + // 1. check the task exists. + stagesM, ok1 := s.expectSubTaskStages[task] + cfgsM, ok2 := s.subTaskCfgs[task] + if !ok1 || !ok2 { + return terror.ErrSchedulerSubTaskOpTaskNotExist.Generate(task) + } + + var ( + notExistSourcesM = make(map[string]struct{}) + stages = make([]ha.Stage, 0, len(sources)) + cfgs = make([]config.SubTaskConfig, 0, len(sources)) + ) + for _, source := range sources { + if stage, ok := stagesM[source]; !ok { + notExistSourcesM[source] = struct{}{} + } else { + stages = append(stages, stage) + } + if cfg, ok := cfgsM[source]; ok { + cfgs = append(cfgs, cfg) + } + } + notExistSources := strMapToSlice(notExistSourcesM) + if len(notExistSources) > 0 { + // some sources not exist, reject the request. + return terror.ErrSchedulerSubTaskOpSourceNotExist.Generate(notExistSources) + } + + // 2. delete the configs and the stages. + _, err := ha.DeleteSubTaskCfgStage(s.etcdCli, cfgs, stages) + if err != nil { + return err + } + + // 3. clear the config and the expectant stage. + for _, cfg := range cfgs { + delete(s.subTaskCfgs[task], cfg.SourceID) + } + if len(s.subTaskCfgs[task]) == 0 { + delete(s.subTaskCfgs, task) + } + for _, stage := range stages { + delete(s.expectSubTaskStages[task], stage.Source) + } + if len(s.expectSubTaskStages[task]) == 0 { + delete(s.expectSubTaskStages, task) + } + + return nil +} + +// GetSubTaskCfgByTaskSource gets subtask config by task name and source ID. +func (s *Scheduler) GetSubTaskCfgByTaskSource(task, source string) *config.SubTaskConfig { + s.mu.RLock() + defer s.mu.RUnlock() + cfgM, ok := s.subTaskCfgs[task] + if !ok { + return nil + } + cfg, ok := cfgM[source] + if !ok { + return nil + } + clone := cfg + return &clone +} + +// GetSubTaskCfgsByTask gets subtask configs' map by task name. +func (s *Scheduler) GetSubTaskCfgsByTask(task string) map[string]*config.SubTaskConfig { + s.mu.RLock() + defer s.mu.RUnlock() + cfgM, ok := s.subTaskCfgs[task] + if !ok { + return nil + } + cloneM := make(map[string]*config.SubTaskConfig, len(cfgM)) + for source, cfg := range cfgM { + clone := cfg + cloneM[source] = &clone + } + return cloneM +} + +// AddWorker adds the information of the DM-worker when registering a new instance. +// This only adds the information of the DM-worker, +// in order to know whether it's online (ready to handle works), +// we need to wait for its healthy status through keep-alive. +func (s *Scheduler) AddWorker(name, addr string) error { + s.mu.Lock() + defer s.mu.Unlock() + + if !s.started { + return terror.ErrSchedulerNotStarted.Generate() + } + + // 1. check whether exists. + if w, ok := s.workers[name]; ok { + // NOTE: we do not support add the worker with different address now, support if needed later. + // but we support add the worker with all the same information multiple times, and only the first one take effect, + // because this is needed when restarting the worker. + if addr == w.BaseInfo().Addr { + s.logger.Warn("add the same worker again", zap.Stringer("worker info", w.BaseInfo())) + return nil + } + return terror.ErrSchedulerWorkerExist.Generate(w.BaseInfo()) + } + + // 2. put the base info into etcd. + info := ha.NewWorkerInfo(name, addr) + _, err := ha.PutWorkerInfo(s.etcdCli, info) + if err != nil { + return err + } + + // generate an agent of DM-worker (with Offline stage) and keep it in the scheduler. + _, err = s.recordWorker(info) + return err +} + +// RemoveWorker removes the information of the DM-worker when removing the instance manually. +// The user should shutdown the DM-worker instance before removing its information. +func (s *Scheduler) RemoveWorker(name string) error { + s.mu.Lock() + defer s.mu.Unlock() + + if !s.started { + return terror.ErrSchedulerNotStarted.Generate() + } + + w, ok := s.workers[name] + if !ok { + return terror.ErrSchedulerWorkerNotExist.Generate(name) + } else if w.Stage() != WorkerOffline { + return terror.ErrSchedulerWorkerOnline.Generate() + } + + // delete the info in etcd. + _, err := ha.DeleteWorkerInfo(s.etcdCli, name) + if err != nil { + return err + } + s.deleteWorker(name) + return nil +} + +// GetWorkerByName gets worker agent by worker name. +func (s *Scheduler) GetWorkerByName(name string) *Worker { + s.mu.RLock() + defer s.mu.RUnlock() + return s.workers[name] +} + +// GetWorkerBySource gets the current bound worker agent by source ID, +// returns nil if the source not bound. +func (s *Scheduler) GetWorkerBySource(source string) *Worker { + s.mu.RLock() + defer s.mu.RUnlock() + return s.bounds[source] +} + +// BoundSources returns all bound source IDs in increasing order. +func (s *Scheduler) BoundSources() []string { + s.mu.RLock() + defer s.mu.RUnlock() + IDs := make([]string, 0, len(s.bounds)) + for ID := range s.bounds { + IDs = append(IDs, ID) + } + sort.Strings(IDs) + return IDs +} + +// UnboundSources returns all unbound source IDs in increasing order. +func (s *Scheduler) UnboundSources() []string { + s.mu.RLock() + defer s.mu.RUnlock() + IDs := make([]string, 0, len(s.unbounds)) + for ID := range s.unbounds { + IDs = append(IDs, ID) + } + sort.Strings(IDs) + return IDs +} + +// UpdateExpectRelayStage updates the current expect relay stage. +// now, only support updates: +// - from `Running` to `Paused`. +// - from `Paused` to `Running`. +// NOTE: from `Running` to `Running` and `Paused` to `Paused` still update the data in etcd, +// because some user may want to update `{Running, Paused, ...}` to `{Running, Running, ...}`. +// so, this should be also supported in DM-worker. +func (s *Scheduler) UpdateExpectRelayStage(newStage pb.Stage, sources ...string) error { + s.mu.Lock() + defer s.mu.Unlock() + + if !s.started { + return terror.ErrSchedulerNotStarted.Generate() + } + + if len(sources) == 0 { + return nil // no sources need to update the stage, this should not happen. + } + + // 1. check the new expectant stage. + switch newStage { + case pb.Stage_Running, pb.Stage_Paused: + default: + return terror.ErrSchedulerRelayStageInvalidUpdate.Generate(newStage) + } + + var ( + notExistSourcesM = make(map[string]struct{}) + currStagesM = make(map[string]struct{}) + stages = make([]ha.Stage, 0, len(sources)) + ) + for _, source := range sources { + if currStage, ok := s.expectRelayStages[source]; !ok { + notExistSourcesM[source] = struct{}{} + } else { + currStagesM[currStage.Expect.String()] = struct{}{} + } + stages = append(stages, ha.NewRelayStage(newStage, source)) + } + notExistSources := strMapToSlice(notExistSourcesM) + currStages := strMapToSlice(currStagesM) + if len(notExistSources) > 0 { + // some sources not exist, reject the request. + return terror.ErrSchedulerRelayStageSourceNotExist.Generate(notExistSources) + } else if len(currStages) > 1 { + // more than one current relay stage exist, but need to update to the same one, log a warn. + s.logger.Warn("update more than one current expectant relay stage to the same one", + zap.Strings("from", currStages), zap.Stringer("to", newStage)) + } + + // 2. put the stages into etcd. + _, err := ha.PutRelayStage(s.etcdCli, stages...) + if err != nil { + return err + } + + // 3. update the stages in the scheduler. + for _, stage := range stages { + s.expectRelayStages[stage.Source] = stage + } + + return nil +} + +// GetExpectRelayStage returns the current expect relay stage. +// If the stage not exists, an invalid stage is returned. +// This func is used for testing. +func (s *Scheduler) GetExpectRelayStage(source string) ha.Stage { + s.mu.RLock() + defer s.mu.RUnlock() + if stage, ok := s.expectRelayStages[source]; ok { + return stage + } + return ha.NewRelayStage(pb.Stage_InvalidStage, source) +} + +// UpdateExpectSubTaskStage updates the current expect subtask stage. +// now, only support updates: +// - from `Running` to `Paused`. +// - from `Paused` to `Running`. +// NOTE: from `Running` to `Running` and `Paused` to `Paused` still update the data in etcd, +// because some user may want to update `{Running, Paused, ...}` to `{Running, Running, ...}`. +// so, this should be also supported in DM-worker. +func (s *Scheduler) UpdateExpectSubTaskStage(newStage pb.Stage, task string, sources ...string) error { + s.mu.Lock() + defer s.mu.Unlock() + + if !s.started { + return terror.ErrSchedulerNotStarted.Generate() + } + + if task == "" || len(sources) == 0 { + return nil // no subtask need to update, this should not happen. + } + + // 1. check the new expectant stage. + switch newStage { + case pb.Stage_Running, pb.Stage_Paused: + default: + return terror.ErrSchedulerSubTaskStageInvalidUpdate.Generate(newStage) + } + + // 2. check the task exists. + stagesM, ok := s.expectSubTaskStages[task] + if !ok { + return terror.ErrSchedulerSubTaskOpTaskNotExist.Generate(task) + } + + var ( + notExistSourcesM = make(map[string]struct{}) + currStagesM = make(map[string]struct{}) + stages = make([]ha.Stage, 0, len(sources)) + ) + for _, source := range sources { + if currStage, ok := stagesM[source]; !ok { + notExistSourcesM[source] = struct{}{} + } else { + currStagesM[currStage.Expect.String()] = struct{}{} + } + stages = append(stages, ha.NewSubTaskStage(newStage, source, task)) + } + notExistSources := strMapToSlice(notExistSourcesM) + currStages := strMapToSlice(currStagesM) + if len(notExistSources) > 0 { + // some sources not exist, reject the request. + return terror.ErrSchedulerSubTaskOpSourceNotExist.Generate(notExistSources) + } else if len(currStages) > 1 { + // more than one current subtask stage exist, but need to update to the same one, log a warn. + s.logger.Warn("update more than one current expectant subtask stage to the same one", + zap.Strings("from", currStages), zap.Stringer("to", newStage)) + } + + // 3. put the stages into etcd. + _, err := ha.PutSubTaskStage(s.etcdCli, stages...) + if err != nil { + return err + } + + // 4. update the stages in the scheduler. + for _, stage := range stages { + s.expectSubTaskStages[task][stage.Source] = stage + } + + return nil +} + +// GetExpectSubTaskStage returns the current expect subtask stage. +// If the stage not exists, an invalid stage is returned. +// This func is used for testing. +func (s *Scheduler) GetExpectSubTaskStage(task, source string) ha.Stage { + s.mu.RLock() + defer s.mu.RUnlock() + invalidStage := ha.NewSubTaskStage(pb.Stage_InvalidStage, source, task) + stageM, ok := s.expectSubTaskStages[task] + if !ok { + return invalidStage + } + stage, ok := stageM[source] + if !ok { + return invalidStage + } + return stage +} + +// recoverSourceCfgs recovers history source configs and expectant relay stages from etcd. +func (s *Scheduler) recoverSources(cli *clientv3.Client) error { + // get all source configs. + cfgM, _, err := ha.GetAllSourceCfg(cli) + if err != nil { + return err + } + // get all relay stages. + stageM, _, err := ha.GetAllRelayStage(cli) + if err != nil { + return err + } + + // recover in-memory data. + for source, cfg := range cfgM { + s.sourceCfgs[source] = cfg + } + for source, stage := range stageM { + s.expectRelayStages[source] = stage + } + + return nil +} + +// recoverSubTasks recovers history subtask configs and expectant subtask stages from etcd. +func (s *Scheduler) recoverSubTasks(cli *clientv3.Client) error { + // get all subtask configs. + cfgMM, _, err := ha.GetAllSubTaskCfg(cli) + if err != nil { + return err + } + // get all subtask stages. + stageMM, _, err := ha.GetAllSubTaskStage(cli) + if err != nil { + return nil + } + + // recover in-memory data. + for source, cfgM := range cfgMM { + for task, cfg := range cfgM { + if _, ok := s.subTaskCfgs[task]; !ok { + s.subTaskCfgs[task] = make(map[string]config.SubTaskConfig) + } + s.subTaskCfgs[task][source] = cfg + } + } + for source, stageM := range stageMM { + for task, stage := range stageM { + if _, ok := s.expectSubTaskStages[task]; !ok { + s.expectSubTaskStages[task] = make(map[string]ha.Stage) + } + s.expectSubTaskStages[task][source] = stage + } + } + + return nil +} + +// recoverWorkersBounds recovers history DM-worker info and status from etcd. +// and it also recovers the bound/unbound relationship. +func (s *Scheduler) recoverWorkersBounds(cli *clientv3.Client) (int64, error) { + // 1. get all history base info. + // it should no new DM-worker registered between this call and the below `GetKeepAliveWorkers`, + // because no DM-master leader are handling DM-worker register requests. + wim, _, err := ha.GetAllWorkerInfo(cli) + if err != nil { + return 0, err + } + + // 2. get all history bound relationships. + // it should no new bound relationship added between this call and the below `GetKeepAliveWorkers`, + // because no DM-master leader are doing the scheduler. + // TODO(csuzhangxc): handle the case where the bound relationship exists, but the base info not exists. + sbm, _, err := ha.GetSourceBound(cli, "") + if err != nil { + return 0, err + } + + // 3. get all history offline status. + kam, rev, err := ha.GetKeepAliveWorkers(cli) + if err != nil { + return 0, err + } + + // 4. recover DM-worker info and status. + for name, info := range wim { + // create and record the worker agent. + w, err2 := s.recordWorker(info) + if err2 != nil { + return 0, err2 + } + // set the stage as Free if it's keep alive. + if _, ok := kam[name]; ok { + w.ToFree() + } + // set the stage as Bound and record the bound relationship if exists. + if bound, ok := sbm[name]; ok { + err2 = s.updateStatusForBound(w, bound) + if err2 != nil { + return 0, err2 + } + } + } + + // 5. recover bounds/unbounds, all sources which not in bounds should be in unbounds. + for source := range s.sourceCfgs { + if _, ok := s.bounds[source]; !ok { + s.unbounds[source] = struct{}{} + } + } + + return rev, nil +} + +// handleWorkerEv handles the online/offline status change event of DM-worker instances. +func (s *Scheduler) handleWorkerEv(ctx context.Context, evCh <-chan ha.WorkerEvent, errCh <-chan error) { + for { + select { + case <-ctx.Done(): + return + case ev, ok := <-evCh: + if !ok { + return + } + s.logger.Info("receive worker status change event", zap.Bool("delete", ev.IsDeleted), zap.Stringer("event", ev)) + var err error + if ev.IsDeleted { + err = s.handleWorkerOffline(ev) + } else { + err = s.handleWorkerOnline(ev) + } + if err != nil { + // TODO(csuzhangxc): report the error through metrics or other methods. + s.logger.Error("fail to handle worker status change event", zap.Bool("delete", ev.IsDeleted), zap.Stringer("event", ev), zap.Error(err)) + } + case err, ok := <-errCh: + if !ok { + return + } + // TODO(csuzhangxc): we only log the `err` here, but we should update metrics and do more works for it later. + s.logger.Error("receive error when watching worker status change event", zap.Error(err)) + } + } +} + +// handleWorkerOnline handles the scheduler when a DM-worker become online. +// This should try to bound an unbounded source to it. +// NOTE: this func need to hold the mutex. +func (s *Scheduler) handleWorkerOnline(ev ha.WorkerEvent) error { + s.mu.Lock() + defer s.mu.Unlock() + + // 1. find the worker. + w, ok := s.workers[ev.WorkerName] + if !ok { + s.logger.Warn("worker for the event not exists", zap.Stringer("event", ev)) + return nil + } + + // 2. check whether is bound. + if w.Stage() == WorkerBound { + // TODO: When dm-worker keepalive is broken, it will turn off its own running source + // After keepalive is restored, this dm-worker should continue to run the previously bound source + // So we PutSourceBound here to trigger dm-worker to get this event and start source again. + // If this worker still start a source, it doesn't matter. dm-worker will omit same source and reject source with different name + s.logger.Warn("worker already bound", zap.Stringer("bound", w.Bound())) + _, err := ha.PutSourceBound(s.etcdCli, w.Bound()) + return err + } + + // 3. change the stage (from Offline) to Free. + w.ToFree() + + // 4. try to bound an unbounded source. + _, err := s.tryBoundForWorker(w) + return err +} + +// handleWorkerOffline handles the scheduler when a DM-worker become offline. +// This should unbound any previous bounded source. +// NOTE: this func need to hold the mutex. +func (s *Scheduler) handleWorkerOffline(ev ha.WorkerEvent) error { + s.mu.Lock() + defer s.mu.Unlock() + + // 1. find the worker. + w, ok := s.workers[ev.WorkerName] + if !ok { + s.logger.Warn("worker for the event not exists", zap.Stringer("event", ev)) + return nil + } + + // 2. find the bound relationship. + bound := w.Bound() + + // 3. check whether bound before. + if bound.Source == "" { + // 3.1. change the stage (from Free) to Offline. + w.ToOffline() + s.logger.Info("worker not bound, no need to unbound", zap.Stringer("event", ev)) + return nil + } + + // 4. delete the bound relationship in etcd. + _, err := ha.DeleteSourceBound(s.etcdCli, bound.Worker) + if err != nil { + return err + } + + // 5. unbound for the source. + s.updateStatusForUnbound(bound.Source) + + // 6. change the stage (from Free) to Offline. + w.ToOffline() + + s.logger.Info("unbound the worker for source", zap.Stringer("bound", bound), zap.Stringer("event", ev)) + + // 7. try to bound the source to a Free worker again. + bounded, err := s.tryBoundForSource(bound.Source) + if err != nil { + return err + } else if !bounded { + // 8. record the source as unbounded. + s.unbounds[bound.Source] = struct{}{} + } + + return nil +} + +// tryBoundForWorker tries to bound a random unbounded source to the worker. +// returns (true, nil) after bounded. +func (s *Scheduler) tryBoundForWorker(w *Worker) (bounded bool, err error) { + // 1. check whether any unbound source exists. + var source string + for source = range s.unbounds { + break // got a source. + } + if source == "" { + s.logger.Info("no unbound sources need to bound", zap.Stringer("worker", w.BaseInfo())) + return false, nil + } + + // 2. pop a source to bound, priority supported if needed later. + // DO NOT forget to push it back if fail to bound. + delete(s.unbounds, source) + defer func() { + if err != nil { + // push the source back. + s.unbounds[source] = struct{}{} + } + }() + + // 3. try to bound them. + err = s.boundSourceToWorker(source, w) + if err != nil { + return false, err + } + return true, nil +} + +// tryBoundForSource tries to bound a source to a random Free worker. +// returns (true, nil) after bounded. +func (s *Scheduler) tryBoundForSource(source string) (bool, error) { + // 1. try to find a random Free worker. + var worker *Worker + for _, w := range s.workers { + if w.Stage() == WorkerFree { + worker = w + break + } + } + if worker == nil { + s.logger.Info("no free worker exists for bound", zap.String("source", source)) + return false, nil + } + + // 2. try to bound them. + err := s.boundSourceToWorker(source, worker) + if err != nil { + return false, err + } + return true, nil +} + +// boundSourceToWorker bounds the source and worker together. +// we should check the bound relationship of the source and the stage of the worker in the caller. +func (s *Scheduler) boundSourceToWorker(source string, w *Worker) error { + // 1. put the bound relationship into etcd. + var err error + bound := ha.NewSourceBound(source, w.BaseInfo().Name) + if _, ok := s.expectRelayStages[source]; ok { + // the relay stage exists before, only put the bound relationship. + _, err = ha.PutSourceBound(s.etcdCli, bound) + } else { + // no relay stage exists before, create a `Runnng` stage and put it with the bound relationship. + stage := ha.NewRelayStage(pb.Stage_Running, source) + _, err = ha.PutRelayStageSourceBound(s.etcdCli, stage, bound) + defer func() { + if err == nil { + // 1.1 if no error exist when returning, record the stage. + s.expectRelayStages[source] = stage + } + }() + } + if err != nil { + return err + } + + // 2. update the bound relationship in the scheduler. + err = s.updateStatusForBound(w, bound) + if err != nil { + return err + } + + s.logger.Info("bound the source to worker", zap.Stringer("bound", bound)) + return nil +} + +// recordWorker creates the worker agent (with Offline stage) and records in the scheduler. +// this func is used when adding a new worker. +// NOTE: trigger scheduler when the worker become online, not when added. +func (s *Scheduler) recordWorker(info ha.WorkerInfo) (*Worker, error) { + w, err := NewWorker(info) + if err != nil { + return nil, err + } + s.workers[info.Name] = w + return w, nil +} + +// deleteWorker deletes the recorded worker and bound. +// this func is used when removing the worker. +// NOTE: trigger scheduler when the worker become offline, not when deleted. +func (s *Scheduler) deleteWorker(name string) { + w, ok := s.workers[name] + if !ok { + return + } + w.Close() + delete(s.workers, name) +} + +// updateStatusForBound updates the in-memory status for bound, including: +// - update the stage of worker to `Bound`. +// - record the bound relationship in the scheduler. +// this func is called after the bound relationship existed in etcd. +func (s *Scheduler) updateStatusForBound(w *Worker, b ha.SourceBound) error { + err := w.ToBound(b) + if err != nil { + return err + } + s.bounds[b.Source] = w + return nil +} + +// updateStatusForUnbound updates the in-memory status for unbound, including: +// - update the stage of worker to `Free`. +// - remove the bound relationship in the scheduler. +// this func is called after the bound relationship removed from etcd. +func (s *Scheduler) updateStatusForUnbound(source string) { + w, ok := s.bounds[source] + if !ok { + return + } + w.ToFree() + delete(s.bounds, source) +} + +// reset resets the internal status. +func (s *Scheduler) reset() { + s.sourceCfgs = make(map[string]config.SourceConfig) + s.subTaskCfgs = make(map[string]map[string]config.SubTaskConfig) + s.workers = make(map[string]*Worker) + s.bounds = make(map[string]*Worker) + s.unbounds = make(map[string]struct{}) + s.expectRelayStages = make(map[string]ha.Stage) + s.expectSubTaskStages = make(map[string]map[string]ha.Stage) +} + +// strMapToSlice converts a `map[string]struct{}` to `[]string` in increasing order. +func strMapToSlice(m map[string]struct{}) []string { + ret := make([]string, 0, len(m)) + for s := range m { + ret = append(ret, s) + } + sort.Strings(ret) + return ret +} + +// SetWorkerClientForTest sets mockWorkerClient for specified worker, only used for test +func (s *Scheduler) SetWorkerClientForTest(name string, mockCli workerrpc.Client) { + if _, ok := s.workers[name]; ok { + s.workers[name].cli = mockCli + } +} diff --git a/dm/master/scheduler/scheduler_test.go b/dm/master/scheduler/scheduler_test.go new file mode 100644 index 0000000000..0ec1b8d6e3 --- /dev/null +++ b/dm/master/scheduler/scheduler_test.go @@ -0,0 +1,614 @@ +// Copyright 2020 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + +package scheduler + +import ( + "context" + "sync" + "testing" + "time" + + . "github.com/pingcap/check" + "go.etcd.io/etcd/clientv3" + "go.etcd.io/etcd/integration" + + "github.com/pingcap/dm/dm/config" + "github.com/pingcap/dm/dm/pb" + "github.com/pingcap/dm/pkg/ha" + "github.com/pingcap/dm/pkg/log" + "github.com/pingcap/dm/pkg/terror" + "github.com/pingcap/dm/pkg/utils" +) + +const ( + // do not forget to update this path if the file removed/renamed. + sourceSampleFile = "../../worker/source.toml" + // do not forget to update this path if the file removed/renamed. + subTaskSampleFile = "../../worker/subtask.toml" +) + +var ( + etcdTestCli *clientv3.Client +) + +func TestScheduler(t *testing.T) { + log.InitLogger(&log.Config{}) + + mockCluster := integration.NewClusterV3(t, &integration.ClusterConfig{Size: 1}) + defer mockCluster.Terminate(t) + + etcdTestCli = mockCluster.RandClient() + + TestingT(t) +} + +// clear keys in etcd test cluster. +func clearTestInfoOperation(c *C) { + c.Assert(ha.ClearTestInfoOperation(etcdTestCli), IsNil) +} + +type testScheduler struct{} + +var _ = Suite(&testScheduler{}) + +var ( + sourceCfgEmpty config.SourceConfig + stageEmpty ha.Stage +) + +func (t *testScheduler) TestScheduler(c *C) { + defer clearTestInfoOperation(c) + + var ( + logger = log.L() + s = NewScheduler(&logger) + sourceID1 = "mysql-replica-1" + sourceID2 = "mysql-replica-2" + workerName1 = "dm-worker-1" + workerName2 = "dm-worker-2" + workerAddr1 = "127.0.0.1:8262" + workerAddr2 = "127.0.0.1:18262" + taskName1 = "task-1" + taskName2 = "task-2" + workerInfo1 = ha.NewWorkerInfo(workerName1, workerAddr1) + workerInfo2 = ha.NewWorkerInfo(workerName2, workerAddr2) + sourceCfg1 config.SourceConfig + subtaskCfg1 config.SubTaskConfig + keepAliveTTL = int64(1) // NOTE: this should be >= minLeaseTTL, in second. + ) + c.Assert(sourceCfg1.LoadFromFile(sourceSampleFile), IsNil) + sourceCfg1.SourceID = sourceID1 + sourceCfg2 := sourceCfg1 + sourceCfg2.SourceID = sourceID2 + + c.Assert(subtaskCfg1.DecodeFile(subTaskSampleFile), IsNil) + subtaskCfg1.SourceID = sourceID1 + subtaskCfg1.Name = taskName1 + c.Assert(subtaskCfg1.Adjust(), IsNil) + subtaskCfg21 := subtaskCfg1 + subtaskCfg21.Name = taskName2 + c.Assert(subtaskCfg21.Adjust(), IsNil) + subtaskCfg22 := subtaskCfg21 + subtaskCfg22.SourceID = sourceID2 + c.Assert(subtaskCfg22.Adjust(), IsNil) + + // not started scheduler can't do anything. + c.Assert(terror.ErrSchedulerNotStarted.Equal(s.AddSourceCfg(sourceCfg1)), IsTrue) + c.Assert(terror.ErrSchedulerNotStarted.Equal(s.RemoveSourceCfg(sourceID1)), IsTrue) + c.Assert(terror.ErrSchedulerNotStarted.Equal(s.AddSubTasks(subtaskCfg1)), IsTrue) + c.Assert(terror.ErrSchedulerNotStarted.Equal(s.RemoveSubTasks(taskName1, sourceID1)), IsTrue) + c.Assert(terror.ErrSchedulerNotStarted.Equal(s.AddWorker(workerName1, workerAddr1)), IsTrue) + c.Assert(terror.ErrSchedulerNotStarted.Equal(s.RemoveWorker(workerName1)), IsTrue) + c.Assert(terror.ErrSchedulerNotStarted.Equal(s.UpdateExpectRelayStage(pb.Stage_Running, sourceID1)), IsTrue) + c.Assert(terror.ErrSchedulerNotStarted.Equal(s.UpdateExpectSubTaskStage(pb.Stage_Running, taskName1, sourceID1)), IsTrue) + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + // CASE 1: start without any previous info. + c.Assert(s.Start(ctx, etcdTestCli), IsNil) + c.Assert(terror.ErrSchedulerStarted.Equal(s.Start(ctx, etcdTestCli)), IsTrue) // start multiple times. + s.Close() + s.Close() // close multiple times. + + // CASE 2: start again without any previous info. + c.Assert(s.Start(ctx, etcdTestCli), IsNil) + + // CASE 2.1: add the first source config. + // no source config exist before added. + t.sourceCfgNotExist(c, s, sourceID1) + // add source config1. + c.Assert(s.AddSourceCfg(sourceCfg1), IsNil) + c.Assert(terror.ErrSchedulerSourceCfgExist.Equal(s.AddSourceCfg(sourceCfg1)), IsTrue) // can't add multiple times. + // the source config added. + t.sourceCfgExist(c, s, sourceCfg1) + // one unbound source exist (because no free worker). + t.sourceBounds(c, s, []string{}, []string{sourceID1}) + + // CASE 2.2: add the first worker. + // no worker exist before added. + t.workerNotExist(c, s, workerName1) + // add worker1. + c.Assert(s.AddWorker(workerName1, workerAddr1), IsNil) + c.Assert(terror.ErrSchedulerWorkerExist.Equal(s.AddWorker(workerName1, workerAddr2)), IsTrue) // can't add with different address now. + c.Assert(s.AddWorker(workerName1, workerAddr1), IsNil) // but can add the worker multiple times (like restart the worker). + // the worker added. + t.workerExist(c, s, workerInfo1) + t.workerOffline(c, s, workerName1) + // still no bounds (because the worker is offline). + t.sourceBounds(c, s, []string{}, []string{sourceID1}) + // no expect relay stage exist (because the source has never been bounded). + t.relayStageMatch(c, s, sourceID1, pb.Stage_InvalidStage) + + // CASE 2.3: the worker become online. + // do keep-alive for worker1. + ctx1, cancel1 := context.WithCancel(ctx) + var wg sync.WaitGroup + wg.Add(1) + go func() { + defer wg.Done() + c.Assert(ha.KeepAlive(ctx1, etcdTestCli, workerName1, keepAliveTTL), IsNil) + }() + // wait for source1 bound to worker1. + utils.WaitSomething(30, 10*time.Millisecond, func() bool { + bounds := s.BoundSources() + return len(bounds) == 1 && bounds[0] == sourceID1 + }) + t.sourceBounds(c, s, []string{sourceID1}, []string{}) + t.workerBound(c, s, ha.NewSourceBound(sourceID1, workerName1)) + // expect relay stage become Running after the first bound. + t.relayStageMatch(c, s, sourceID1, pb.Stage_Running) + + // CASE 2.4: pause the relay. + c.Assert(s.UpdateExpectRelayStage(pb.Stage_Paused, sourceID1), IsNil) + t.relayStageMatch(c, s, sourceID1, pb.Stage_Paused) + // update relay stage without source take no effect now (and return without error). + c.Assert(s.UpdateExpectRelayStage(pb.Stage_Running), IsNil) + t.relayStageMatch(c, s, sourceID1, pb.Stage_Paused) + // update to non-(Running, Paused) stage is invalid. + c.Assert(terror.ErrSchedulerRelayStageInvalidUpdate.Equal(s.UpdateExpectRelayStage(pb.Stage_InvalidStage, sourceID1)), IsTrue) + c.Assert(terror.ErrSchedulerRelayStageInvalidUpdate.Equal(s.UpdateExpectRelayStage(pb.Stage_New, sourceID1)), IsTrue) + c.Assert(terror.ErrSchedulerRelayStageInvalidUpdate.Equal(s.UpdateExpectRelayStage(pb.Stage_Stopped, sourceID1)), IsTrue) + c.Assert(terror.ErrSchedulerRelayStageInvalidUpdate.Equal(s.UpdateExpectRelayStage(pb.Stage_Finished, sourceID1)), IsTrue) + // can't update stage with not existing sources now. + c.Assert(terror.ErrSchedulerRelayStageSourceNotExist.Equal(s.UpdateExpectRelayStage(pb.Stage_Running, sourceID1, sourceID2)), IsTrue) + t.relayStageMatch(c, s, sourceID1, pb.Stage_Paused) + + // CASE 2.5: resume the relay. + c.Assert(s.UpdateExpectRelayStage(pb.Stage_Running, sourceID1), IsNil) + t.relayStageMatch(c, s, sourceID1, pb.Stage_Running) + + // CASE 2.6: start a task with only one source. + // no subtask config exists before start. + c.Assert(s.AddSubTasks(), IsNil) // can call without configs, return without error, but take no effect. + t.subTaskCfgNotExist(c, s, taskName1, sourceID1) + t.subTaskStageMatch(c, s, taskName1, sourceID1, pb.Stage_InvalidStage) + // start the task. + c.Assert(s.AddSubTasks(subtaskCfg1), IsNil) + c.Assert(terror.ErrSchedulerSubTaskExist.Equal(s.AddSubTasks(subtaskCfg1)), IsTrue) // add again. + // subtask config and stage exist. + t.subTaskCfgExist(c, s, subtaskCfg1) + t.subTaskStageMatch(c, s, taskName1, sourceID1, pb.Stage_Running) + + // try start a task with two sources, some sources not bound. + c.Assert(terror.ErrSchedulerSourcesUnbound.Equal(s.AddSubTasks(subtaskCfg21, subtaskCfg22)), IsTrue) + t.subTaskCfgNotExist(c, s, taskName2, sourceID1) + t.subTaskStageMatch(c, s, taskName2, sourceID1, pb.Stage_InvalidStage) + t.subTaskCfgNotExist(c, s, taskName2, sourceID2) + t.subTaskStageMatch(c, s, taskName2, sourceID2, pb.Stage_InvalidStage) + + // CASE 2.7: pause/resume task1. + c.Assert(s.UpdateExpectSubTaskStage(pb.Stage_Paused, taskName1, sourceID1), IsNil) + t.subTaskStageMatch(c, s, taskName1, sourceID1, pb.Stage_Paused) + c.Assert(s.UpdateExpectSubTaskStage(pb.Stage_Running, taskName1, sourceID1), IsNil) + t.subTaskStageMatch(c, s, taskName1, sourceID1, pb.Stage_Running) + // update subtask stage without source or task take no effect now (and return without error). + c.Assert(s.UpdateExpectSubTaskStage(pb.Stage_Paused, "", sourceID1), IsNil) + c.Assert(s.UpdateExpectSubTaskStage(pb.Stage_Paused, taskName1), IsNil) + t.relayStageMatch(c, s, sourceID1, pb.Stage_Running) + // update to non-(Running, Paused) stage is invalid. + c.Assert(terror.ErrSchedulerSubTaskStageInvalidUpdate.Equal(s.UpdateExpectSubTaskStage(pb.Stage_InvalidStage, taskName1, sourceID1)), IsTrue) + c.Assert(terror.ErrSchedulerSubTaskStageInvalidUpdate.Equal(s.UpdateExpectSubTaskStage(pb.Stage_New, taskName1, sourceID1)), IsTrue) + c.Assert(terror.ErrSchedulerSubTaskStageInvalidUpdate.Equal(s.UpdateExpectSubTaskStage(pb.Stage_Stopped, taskName1, sourceID1)), IsTrue) + c.Assert(terror.ErrSchedulerSubTaskStageInvalidUpdate.Equal(s.UpdateExpectSubTaskStage(pb.Stage_Finished, taskName1, sourceID1)), IsTrue) + // can't update stage with not existing sources now. + c.Assert(terror.ErrSchedulerSubTaskOpSourceNotExist.Equal(s.UpdateExpectSubTaskStage(pb.Stage_Paused, taskName1, sourceID1, sourceID2)), IsTrue) + t.relayStageMatch(c, s, sourceID1, pb.Stage_Running) + + // CASE 2.8: worker1 become offline. + // cancel keep-alive. + cancel1() + wg.Wait() + // wait for source1 unbound from worker1. + utils.WaitSomething(int(3*keepAliveTTL), time.Second, func() bool { + unbounds := s.UnboundSources() + return len(unbounds) == 1 && unbounds[0] == sourceID1 + }) + t.sourceBounds(c, s, []string{}, []string{sourceID1}) + // static information are still there. + t.sourceCfgExist(c, s, sourceCfg1) + t.subTaskCfgExist(c, s, subtaskCfg1) + t.workerExist(c, s, workerInfo1) + // worker1 still exists, but it's offline. + t.workerOffline(c, s, workerName1) + // expect relay stage keep Running. + t.relayStageMatch(c, s, sourceID1, pb.Stage_Running) + t.subTaskStageMatch(c, s, taskName1, sourceID1, pb.Stage_Running) + + // shutdown the scheduler. + s.Close() + + // CASE 3: start again with previous `Offline` worker, relay stage, subtask stage. + c.Assert(s.Start(ctx, etcdTestCli), IsNil) + + // CASE 3.1: previous information should recover. + // source1 is still unbound. + t.sourceBounds(c, s, []string{}, []string{sourceID1}) + // worker1 still exists, but it's offline. + t.workerOffline(c, s, workerName1) + // static information are still there. + t.sourceCfgExist(c, s, sourceCfg1) + t.subTaskCfgExist(c, s, subtaskCfg1) + t.workerExist(c, s, workerInfo1) + // expect relay stage keep Running. + t.relayStageMatch(c, s, sourceID1, pb.Stage_Running) + t.subTaskStageMatch(c, s, taskName1, sourceID1, pb.Stage_Running) + + // CASE 3.2: start worker1 again. + // do keep-alive for worker1 again. + ctx1, cancel1 = context.WithCancel(ctx) + wg.Add(1) + go func() { + defer wg.Done() + c.Assert(ha.KeepAlive(ctx1, etcdTestCli, workerName1, keepAliveTTL), IsNil) + }() + // wait for source1 bound to worker1. + utils.WaitSomething(30, 10*time.Millisecond, func() bool { + bounds := s.BoundSources() + return len(bounds) == 1 && bounds[0] == sourceID1 + }) + // source1 bound to worker1. + t.sourceBounds(c, s, []string{sourceID1}, []string{}) + t.workerBound(c, s, ha.NewSourceBound(sourceID1, workerName1)) + // expect stages keep Running. + t.relayStageMatch(c, s, sourceID1, pb.Stage_Running) + t.subTaskStageMatch(c, s, taskName1, sourceID1, pb.Stage_Running) + + // shutdown the scheduler. + s.Close() + + // CASE 4: start again with previous `Bound` worker, relay stage, subtask stage. + c.Assert(s.Start(ctx, etcdTestCli), IsNil) + + // CASE 4.1: previous information should recover. + // source1 is still bound. + t.sourceBounds(c, s, []string{sourceID1}, []string{}) + // worker1 still exists, and it's bound. + t.workerBound(c, s, ha.NewSourceBound(sourceID1, workerName1)) + // static information are still there. + t.sourceCfgExist(c, s, sourceCfg1) + t.subTaskCfgExist(c, s, subtaskCfg1) + t.workerExist(c, s, workerInfo1) + // expect stages keep Running. + t.relayStageMatch(c, s, sourceID1, pb.Stage_Running) + t.subTaskStageMatch(c, s, taskName1, sourceID1, pb.Stage_Running) + + // CASE 4.2: add another worker into the cluster. + // worker2 not exists before added. + t.workerNotExist(c, s, workerName2) + // add worker2. + c.Assert(s.AddWorker(workerName2, workerAddr2), IsNil) + // the worker added, but is offline. + t.workerExist(c, s, workerInfo2) + t.workerOffline(c, s, workerName2) + + // CASE 4.3: the worker2 become online. + // do keep-alive for worker2. + ctx2, cancel2 := context.WithCancel(ctx) + wg.Add(1) + go func() { + defer wg.Done() + c.Assert(ha.KeepAlive(ctx2, etcdTestCli, workerName2, keepAliveTTL), IsNil) + }() + // wait for worker2 become Free. + utils.WaitSomething(30, 10*time.Millisecond, func() bool { + w := s.GetWorkerByName(workerName2) + return w.Stage() == WorkerFree + }) + t.workerFree(c, s, workerName2) + + // CASE 4.4: add source config2. + // source2 not exists before. + t.sourceCfgNotExist(c, s, sourceID2) + // add source2. + c.Assert(s.AddSourceCfg(sourceCfg2), IsNil) + // source2 added. + t.sourceCfgExist(c, s, sourceCfg2) + // source2 should bound to worker2. + t.workerBound(c, s, ha.NewSourceBound(sourceID2, workerName2)) + t.sourceBounds(c, s, []string{sourceID1, sourceID2}, []string{}) + t.relayStageMatch(c, s, sourceID2, pb.Stage_Running) + + // CASE 4.4: start a task with two sources. + // can't add more than one tasks at a time now. + c.Assert(terror.ErrSchedulerMultiTask.Equal(s.AddSubTasks(subtaskCfg1, subtaskCfg21)), IsTrue) + // task2' config and stage not exists before. + t.subTaskCfgNotExist(c, s, taskName2, sourceID1) + t.subTaskCfgNotExist(c, s, taskName2, sourceID2) + t.subTaskStageMatch(c, s, taskName2, sourceID1, pb.Stage_InvalidStage) + t.subTaskStageMatch(c, s, taskName2, sourceID2, pb.Stage_InvalidStage) + // start task2. + c.Assert(s.AddSubTasks(subtaskCfg21, subtaskCfg22), IsNil) + // config added, stage become Running. + t.subTaskCfgExist(c, s, subtaskCfg21) + t.subTaskCfgExist(c, s, subtaskCfg22) + t.subTaskStageMatch(c, s, taskName2, sourceID1, pb.Stage_Running) + t.subTaskStageMatch(c, s, taskName2, sourceID2, pb.Stage_Running) + + // CASE 4.4.1 fail to stop any task. + // can call without tasks or sources, return without error, but take no effect. + c.Assert(s.RemoveSubTasks("", sourceID1), IsNil) + c.Assert(s.RemoveSubTasks(taskName1), IsNil) + // stop not exist task. + c.Assert(terror.ErrSchedulerSubTaskOpTaskNotExist.Equal(s.RemoveSubTasks("not-exist", sourceID1)), IsTrue) + // config and stage not changed. + t.subTaskCfgExist(c, s, subtaskCfg21) + t.subTaskCfgExist(c, s, subtaskCfg22) + t.subTaskStageMatch(c, s, taskName2, sourceID1, pb.Stage_Running) + t.subTaskStageMatch(c, s, taskName2, sourceID2, pb.Stage_Running) + + // CASE 4.5: update subtasks stage from different current stage. + // pause . + c.Assert(s.UpdateExpectSubTaskStage(pb.Stage_Paused, taskName2, sourceID1), IsNil) + t.subTaskStageMatch(c, s, taskName2, sourceID1, pb.Stage_Paused) + t.subTaskStageMatch(c, s, taskName2, sourceID2, pb.Stage_Running) + // resume . + c.Assert(s.UpdateExpectSubTaskStage(pb.Stage_Running, taskName2, sourceID1, sourceID2), IsNil) + t.subTaskStageMatch(c, s, taskName2, sourceID1, pb.Stage_Running) + t.subTaskStageMatch(c, s, taskName2, sourceID2, pb.Stage_Running) + + // CASE 4.6: try remove source when subtasks exist. + c.Assert(terror.ErrSchedulerSourceOpTaskExist.Equal(s.RemoveSourceCfg(sourceID2)), IsTrue) + // source2 keep there. + t.sourceCfgExist(c, s, sourceCfg2) + // source2 still bound to worker2. + t.workerBound(c, s, ha.NewSourceBound(sourceID2, workerName2)) + t.sourceBounds(c, s, []string{sourceID1, sourceID2}, []string{}) + t.relayStageMatch(c, s, sourceID2, pb.Stage_Running) + + // CASE 4.7: stop task2. + c.Assert(s.RemoveSubTasks(taskName2, sourceID1, sourceID2), IsNil) + t.subTaskCfgNotExist(c, s, taskName2, sourceID1) + t.subTaskCfgNotExist(c, s, taskName2, sourceID2) + t.subTaskStageMatch(c, s, taskName2, sourceID1, pb.Stage_InvalidStage) + t.subTaskStageMatch(c, s, taskName2, sourceID2, pb.Stage_InvalidStage) + + // CASE 4.7: remove source2. + c.Assert(s.RemoveSourceCfg(sourceID2), IsNil) + c.Assert(terror.ErrSchedulerSourceCfgNotExist.Equal(s.RemoveSourceCfg(sourceID2)), IsTrue) // already removed. + // source2 removed. + t.sourceCfgNotExist(c, s, sourceID2) + // worker2 become Free now. + t.workerFree(c, s, workerName2) + t.sourceBounds(c, s, []string{sourceID1}, []string{}) + t.relayStageMatch(c, s, sourceID2, pb.Stage_InvalidStage) + + // CASE 4.8: worker1 become offline. + // before shutdown, worker1 bound source + t.workerBound(c, s, ha.NewSourceBound(sourceID1, workerName1)) + // cancel keep-alive. + cancel1() + // wait for worker1 become offline. + utils.WaitSomething(int(3*keepAliveTTL), time.Second, func() bool { + w := s.GetWorkerByName(workerName1) + c.Assert(w, NotNil) + return w.Stage() == WorkerOffline + }) + t.workerOffline(c, s, workerName1) + // source1 should bound to worker2. + t.sourceBounds(c, s, []string{sourceID1}, []string{}) + t.workerBound(c, s, ha.NewSourceBound(sourceID1, workerName2)) + // expect stages keep Running. + t.relayStageMatch(c, s, sourceID1, pb.Stage_Running) + t.subTaskStageMatch(c, s, taskName1, sourceID1, pb.Stage_Running) + + // CASE 4.9: remove worker1. + c.Assert(s.RemoveWorker(workerName1), IsNil) + c.Assert(terror.ErrSchedulerWorkerNotExist.Equal(s.RemoveWorker(workerName1)), IsTrue) // can't remove multiple times. + // worker1 not exists now. + t.workerNotExist(c, s, workerName1) + + // CASE 4.10: stop task1. + c.Assert(s.RemoveSubTasks(taskName1, sourceID1), IsNil) + t.subTaskCfgNotExist(c, s, taskName1, sourceID1) + t.subTaskStageMatch(c, s, taskName1, sourceID1, pb.Stage_InvalidStage) + + // CASE 4.11: remove worker not supported when the worker is online. + c.Assert(terror.ErrSchedulerWorkerOnline.Equal(s.RemoveWorker(workerName2)), IsTrue) + t.sourceBounds(c, s, []string{sourceID1}, []string{}) + t.workerBound(c, s, ha.NewSourceBound(sourceID1, workerName2)) + t.relayStageMatch(c, s, sourceID1, pb.Stage_Running) + + // CASE 4.12: worker2 become offline. + cancel2() + wg.Wait() + // wait for worker2 become offline. + utils.WaitSomething(int(3*keepAliveTTL), time.Second, func() bool { + w := s.GetWorkerByName(workerName2) + c.Assert(w, NotNil) + return w.Stage() == WorkerOffline + }) + t.workerOffline(c, s, workerName2) + // source1 should unbound + t.sourceBounds(c, s, []string{}, []string{sourceID1}) + // expect stages keep Running. + t.relayStageMatch(c, s, sourceID1, pb.Stage_Running) + + // CASE 4.13: remove worker2. + c.Assert(s.RemoveWorker(workerName2), IsNil) + t.workerNotExist(c, s, workerName2) + // relay stage still there. + t.sourceBounds(c, s, []string{}, []string{sourceID1}) + t.relayStageMatch(c, s, sourceID1, pb.Stage_Running) + + // CASE 4.14: remove source1. + c.Assert(s.RemoveSourceCfg(sourceID1), IsNil) + t.sourceCfgNotExist(c, s, sourceID1) + t.sourceBounds(c, s, []string{}, []string{}) + t.relayStageMatch(c, s, sourceID1, pb.Stage_InvalidStage) +} + +func (t *testScheduler) sourceCfgNotExist(c *C, s *Scheduler, source string) { + c.Assert(s.GetSourceCfgByID(source), IsNil) + cfg, _, err := ha.GetSourceCfg(etcdTestCli, source, 0) + c.Assert(err, IsNil) + c.Assert(cfg, DeepEquals, sourceCfgEmpty) +} + +func (t *testScheduler) sourceCfgExist(c *C, s *Scheduler, expectCfg config.SourceConfig) { + cfgP := s.GetSourceCfgByID(expectCfg.SourceID) + c.Assert(cfgP, DeepEquals, &expectCfg) + cfgV, _, err := ha.GetSourceCfg(etcdTestCli, expectCfg.SourceID, 0) + c.Assert(err, IsNil) + c.Assert(cfgV, DeepEquals, expectCfg) +} + +func (t *testScheduler) subTaskCfgNotExist(c *C, s *Scheduler, task, source string) { + c.Assert(s.GetSubTaskCfgByTaskSource(task, source), IsNil) + cfgM, _, err := ha.GetSubTaskCfg(etcdTestCli, source, task, 0) + c.Assert(err, IsNil) + c.Assert(cfgM, HasLen, 0) +} + +func (t *testScheduler) subTaskCfgExist(c *C, s *Scheduler, expectCfg config.SubTaskConfig) { + cfgP := s.GetSubTaskCfgByTaskSource(expectCfg.Name, expectCfg.SourceID) + c.Assert(cfgP, DeepEquals, &expectCfg) + cfgM, _, err := ha.GetSubTaskCfg(etcdTestCli, expectCfg.SourceID, expectCfg.Name, 0) + c.Assert(err, IsNil) + c.Assert(cfgM, HasLen, 1) + c.Assert(cfgM[expectCfg.Name], DeepEquals, expectCfg) +} + +func (t *testScheduler) workerNotExist(c *C, s *Scheduler, worker string) { + c.Assert(s.GetWorkerByName(worker), IsNil) + wm, _, err := ha.GetAllWorkerInfo(etcdTestCli) + c.Assert(err, IsNil) + _, ok := wm[worker] + c.Assert(ok, IsFalse) +} + +func (t *testScheduler) workerExist(c *C, s *Scheduler, info ha.WorkerInfo) { + c.Assert(s.GetWorkerByName(info.Name), NotNil) + c.Assert(s.GetWorkerByName(info.Name).BaseInfo(), DeepEquals, info) + wm, _, err := ha.GetAllWorkerInfo(etcdTestCli) + c.Assert(err, IsNil) + c.Assert(wm[info.Name], DeepEquals, info) +} + +func (t *testScheduler) workerOffline(c *C, s *Scheduler, worker string) { + w := s.GetWorkerByName(worker) + c.Assert(w, NotNil) + c.Assert(w.Bound(), DeepEquals, nullBound) + c.Assert(w.Stage(), Equals, WorkerOffline) + wm, _, err := ha.GetAllWorkerInfo(etcdTestCli) + c.Assert(err, IsNil) + _, ok := wm[worker] + c.Assert(ok, IsTrue) + sbm, _, err := ha.GetSourceBound(etcdTestCli, worker) + c.Assert(err, IsNil) + _, ok = sbm[worker] + c.Assert(ok, IsFalse) +} + +func (t *testScheduler) workerFree(c *C, s *Scheduler, worker string) { + w := s.GetWorkerByName(worker) + c.Assert(w, NotNil) + c.Assert(w.Bound(), DeepEquals, nullBound) + c.Assert(w.Stage(), Equals, WorkerFree) + wm, _, err := ha.GetAllWorkerInfo(etcdTestCli) + c.Assert(err, IsNil) + _, ok := wm[worker] + c.Assert(ok, IsTrue) + sbm, _, err := ha.GetSourceBound(etcdTestCli, worker) + c.Assert(err, IsNil) + _, ok = sbm[worker] + c.Assert(ok, IsFalse) +} + +func (t *testScheduler) workerBound(c *C, s *Scheduler, bound ha.SourceBound) { + w := s.GetWorkerByName(bound.Worker) + c.Assert(w, NotNil) + c.Assert(w.Bound(), DeepEquals, bound) + c.Assert(w.Stage(), Equals, WorkerBound) + wm, _, err := ha.GetAllWorkerInfo(etcdTestCli) + c.Assert(err, IsNil) + _, ok := wm[bound.Worker] + c.Assert(ok, IsTrue) + sbm, _, err := ha.GetSourceBound(etcdTestCli, bound.Worker) + c.Assert(err, IsNil) + c.Assert(sbm[bound.Worker], DeepEquals, bound) +} + +func (t *testScheduler) sourceBounds(c *C, s *Scheduler, expectBounds, expectUnbounds []string) { + c.Assert(s.BoundSources(), DeepEquals, expectBounds) + c.Assert(s.UnboundSources(), DeepEquals, expectUnbounds) + + wToB, _, err := ha.GetSourceBound(etcdTestCli, "") + c.Assert(err, IsNil) + c.Assert(wToB, HasLen, len(expectBounds)) + + sToB := make(map[string]ha.SourceBound, len(wToB)) + for _, b := range wToB { + sToB[b.Source] = b + } + for _, source := range expectBounds { + c.Assert(sToB[source], NotNil) + c.Assert(s.GetWorkerBySource(source), NotNil) + c.Assert(s.GetWorkerBySource(source).Stage(), Equals, WorkerBound) + c.Assert(sToB[source], DeepEquals, s.GetWorkerBySource(source).Bound()) + } + + for _, source := range expectUnbounds { + c.Assert(s.GetWorkerBySource(source), IsNil) + } +} + +func (t *testScheduler) relayStageMatch(c *C, s *Scheduler, source string, expectStage pb.Stage) { + stage := ha.NewRelayStage(expectStage, source) + c.Assert(s.GetExpectRelayStage(source), DeepEquals, stage) + + eStage, _, err := ha.GetRelayStage(etcdTestCli, source) + c.Assert(err, IsNil) + switch expectStage { + case pb.Stage_Running, pb.Stage_Paused: + c.Assert(eStage, DeepEquals, stage) + default: + c.Assert(eStage, DeepEquals, stageEmpty) + } +} + +func (t *testScheduler) subTaskStageMatch(c *C, s *Scheduler, task, source string, expectStage pb.Stage) { + stage := ha.NewSubTaskStage(expectStage, source, task) + c.Assert(s.GetExpectSubTaskStage(task, source), DeepEquals, stage) + + eStageM, _, err := ha.GetSubTaskStage(etcdTestCli, source, task) + c.Assert(err, IsNil) + switch expectStage { + case pb.Stage_Running, pb.Stage_Paused: + c.Assert(eStageM, HasLen, 1) + c.Assert(eStageM[task], DeepEquals, stage) + default: + c.Assert(eStageM, HasLen, 0) + } +} diff --git a/dm/master/scheduler/worker.go b/dm/master/scheduler/worker.go new file mode 100644 index 0000000000..89619fee91 --- /dev/null +++ b/dm/master/scheduler/worker.go @@ -0,0 +1,136 @@ +// Copyright 2020 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + +package scheduler + +import ( + "context" + "sync" + "time" + + "github.com/pingcap/dm/dm/master/workerrpc" + "github.com/pingcap/dm/pkg/ha" + "github.com/pingcap/dm/pkg/terror" +) + +// WorkerStage represents the stage of a DM-worker instance. +type WorkerStage string + +// the stage of DM-worker instances. +// valid transformation: +// - Offline -> Free, receive keep-alive. +// - Free -> Offline, lost keep-alive. +// - Free -> Bound, schedule source. +// - Bound -> Offline, lost keep-live, when receive keep-alive again, it should become Free. +// - Bound -> Free, revoke source scheduler. +// invalid transformation: +// - Offline -> WorkerBound, must become Free first. +const ( + WorkerOffline WorkerStage = "offline" // the worker is not online yet. + WorkerFree WorkerStage = "free" // the worker is online, but no upstream source assigned to it yet. + WorkerBound WorkerStage = "bound" // the worker is online, and one upstream source already assigned to it. +) + +var ( + nullBound ha.SourceBound +) + +// Worker is an agent for a DM-worker instance. +type Worker struct { + mu sync.RWMutex + + cli workerrpc.Client // the gRPC client proxy. + + baseInfo ha.WorkerInfo // the base information of the DM-worker instance. + bound ha.SourceBound // the source bound relationship, null value if not bounded. + stage WorkerStage // the current stage. +} + +// NewWorker creates a new Worker instance with Offline stage. +func NewWorker(baseInfo ha.WorkerInfo) (*Worker, error) { + cli, err := workerrpc.NewGRPCClient(baseInfo.Addr) + if err != nil { + return nil, err + } + + w := &Worker{ + cli: cli, + baseInfo: baseInfo, + stage: WorkerOffline, + } + return w, nil +} + +// Close closes the worker and release resources. +func (w *Worker) Close() { + w.mu.Lock() + defer w.mu.Unlock() + w.cli.Close() +} + +// ToOffline transforms to Offline. +// both Free and Bound can transform to Offline. +func (w *Worker) ToOffline() { + w.mu.Lock() + defer w.mu.Unlock() + w.stage = WorkerOffline + w.bound = nullBound +} + +// ToFree transforms to Free. +// both Offline and Bound can transform to Free. +func (w *Worker) ToFree() { + w.mu.Lock() + defer w.mu.Unlock() + w.stage = WorkerFree + w.bound = nullBound +} + +// ToBound transforms to Bound. +// Free can transform to Bound, but Offline can't. +func (w *Worker) ToBound(bound ha.SourceBound) error { + w.mu.Lock() + defer w.mu.Unlock() + if w.stage == WorkerOffline { + return terror.ErrSchedulerWorkerInvalidTrans.Generate(w.BaseInfo(), WorkerOffline, WorkerBound) + } + w.stage = WorkerBound + w.bound = bound + return nil +} + +// BaseInfo returns the base info of the worker. +// No lock needed because baseInfo should not be modified after the instance created. +func (w *Worker) BaseInfo() ha.WorkerInfo { + return w.baseInfo +} + +// Stage returns the current stage. +func (w *Worker) Stage() WorkerStage { + w.mu.RLock() + defer w.mu.RUnlock() + return w.stage +} + +// Bound returns the current source ID bounded to, +// returns null value if not bounded. +func (w *Worker) Bound() ha.SourceBound { + w.mu.RLock() + defer w.mu.RUnlock() + return w.bound +} + +// SendRequest sends request to the DM-worker instance. +func (w *Worker) SendRequest(ctx context.Context, req *workerrpc.Request, d time.Duration) (*workerrpc.Response, error) { + return w.cli.SendRequest(ctx, req, d) +} diff --git a/dm/master/scheduler/worker_test.go b/dm/master/scheduler/worker_test.go new file mode 100644 index 0000000000..ba8e67cc8d --- /dev/null +++ b/dm/master/scheduler/worker_test.go @@ -0,0 +1,95 @@ +// Copyright 2020 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + +package scheduler + +import ( + "context" + "time" + + . "github.com/pingcap/check" + + "github.com/pingcap/dm/dm/master/workerrpc" + "github.com/pingcap/dm/dm/pb" + "github.com/pingcap/dm/pkg/ha" + "github.com/pingcap/dm/pkg/terror" +) + +type testWorker struct { +} + +var _ = Suite(&testWorker{}) + +func (t *testWorker) TestWorker(c *C) { + var ( + name = "dm-worker-1" + info = ha.NewWorkerInfo(name, "127.0.0.1:8262") + bound = ha.NewSourceBound("mysql-replica-1", name) + ) + + // create a worker with Offline stage and not bound. + w, err := NewWorker(info) + c.Assert(err, IsNil) + defer w.Close() + c.Assert(w.BaseInfo(), DeepEquals, info) + c.Assert(w.Stage(), Equals, WorkerOffline) + c.Assert(w.Bound(), DeepEquals, nullBound) + + // Offline to Free. + w.ToFree() + c.Assert(w.Stage(), Equals, WorkerFree) + c.Assert(w.Bound(), DeepEquals, nullBound) + + // Free to Bound. + c.Assert(w.ToBound(bound), IsNil) + c.Assert(w.Stage(), Equals, WorkerBound) + c.Assert(w.Bound(), DeepEquals, bound) + + // Bound to Free. + w.ToFree() + c.Assert(w.Stage(), Equals, WorkerFree) + c.Assert(w.Bound(), DeepEquals, nullBound) + + // Free to Offline. + w.ToOffline() + c.Assert(w.Stage(), Equals, WorkerOffline) + c.Assert(w.Bound(), DeepEquals, nullBound) + + // Offline to Bound, invalid. + c.Assert(terror.ErrSchedulerWorkerInvalidTrans.Equal(w.ToBound(bound)), IsTrue) + c.Assert(w.Stage(), Equals, WorkerOffline) + c.Assert(w.Bound(), DeepEquals, nullBound) + + // Offline to Free to Bound again. + w.ToFree() + c.Assert(w.ToBound(bound), IsNil) + c.Assert(w.Stage(), Equals, WorkerBound) + c.Assert(w.Bound(), DeepEquals, bound) + + // Bound to Offline. + w.ToOffline() + c.Assert(w.Stage(), Equals, WorkerOffline) + c.Assert(w.Bound(), DeepEquals, nullBound) + + // SendRequest. + req := &workerrpc.Request{ + Type: workerrpc.CmdOperateSubTask, + OperateSubTask: &pb.OperateSubTaskRequest{ + Op: pb.TaskOp_Pause, + Name: "task1", + }, + } + resp, err := w.SendRequest(context.Background(), req, time.Second) + c.Assert(err, ErrorMatches, ".*connection refused.*") + c.Assert(resp, IsNil) +} diff --git a/dm/master/server.go b/dm/master/server.go index fb68c63bc8..dcf7214512 100644 --- a/dm/master/server.go +++ b/dm/master/server.go @@ -20,7 +20,8 @@ import ( "sort" "strings" "sync" - "time" + + "github.com/pingcap/dm/pkg/conn" "github.com/pingcap/errors" "github.com/siddontang/go/sync2" @@ -30,14 +31,12 @@ import ( "google.golang.org/grpc" "github.com/pingcap/dm/checker" - "github.com/pingcap/dm/dm/common" "github.com/pingcap/dm/dm/config" - "github.com/pingcap/dm/dm/master/coordinator" + "github.com/pingcap/dm/dm/master/scheduler" "github.com/pingcap/dm/dm/master/shardddl" operator "github.com/pingcap/dm/dm/master/sql-operator" "github.com/pingcap/dm/dm/master/workerrpc" "github.com/pingcap/dm/dm/pb" - "github.com/pingcap/dm/pkg/conn" "github.com/pingcap/dm/pkg/election" "github.com/pingcap/dm/pkg/etcdutil" "github.com/pingcap/dm/pkg/log" @@ -54,11 +53,6 @@ const ( electionKey = "/dm-master/leader" ) -var ( - fetchDDLInfoRetryTimeout = 5 * time.Second - etcdTimeouit = 10 * time.Second -) - // Server handles RPC requests for dm-master type Server struct { sync.RWMutex @@ -79,11 +73,7 @@ type Server struct { bgFunWg sync.WaitGroup // dm-worker-ID(host:ip) -> dm-worker client management - coordinator *coordinator.Coordinator - workerClients map[string]workerrpc.Client - - // task-name -> source-list - taskSources map[string][]string + scheduler *scheduler.Scheduler // shard DDL pessimist pessimist *shardddl.Pessimist @@ -102,16 +92,14 @@ type Server struct { // NewServer creates a new Server func NewServer(cfg *Config) *Server { + logger := log.L() server := Server{ cfg: cfg, - coordinator: coordinator.NewCoordinator(), - workerClients: make(map[string]workerrpc.Client), - taskSources: make(map[string][]string), + scheduler: scheduler.NewScheduler(&logger), sqlOperatorHolder: operator.NewHolder(), idGen: tracing.NewIDGen(), ap: NewAgentPool(&RateLimitConfig{rate: cfg.RPCRateLimit, burst: cfg.RPCRateBurst}), } - logger := log.L() server.pessimist = shardddl.NewPessimist(&logger, server.getTaskResources) server.closed.Set(true) @@ -214,31 +202,6 @@ func (s *Server) Start(ctx context.Context) (err error) { return } -func (s *Server) recoverSubTask() error { - ectx, cancel := context.WithTimeout(s.etcdClient.Ctx(), etcdTimeouit) - defer cancel() - resp, err := s.etcdClient.Get(ectx, common.UpstreamSubTaskKeyAdapter.Path(), clientv3.WithPrefix()) - if err != nil { - return err - } - for _, kv := range resp.Kvs { - infos, err := common.UpstreamSubTaskKeyAdapter.Decode(string(kv.Key)) - if err != nil { - return terror.Annotate(err, "decode upstream subtask key from etcd failed") - } - sourceID := infos[0] - taskName := infos[1] - if sources, ok := s.taskSources[taskName]; ok { - s.taskSources[taskName] = append(sources, sourceID) - } else { - srcs := make([]string, 1) - srcs = append(srcs, sourceID) - s.taskSources[taskName] = srcs - } - } - return nil -} - // Close close the RPC server, this function can be called multiple times func (s *Server) Close() { if s.closed.Get() { @@ -282,6 +245,7 @@ func errorCommonWorkerResponse(msg string, source string) *pb.CommonWorkerRespon // key: /dm-worker/r/address // value: name func (s *Server) RegisterWorker(ctx context.Context, req *pb.RegisterWorkerRequest) (*pb.RegisterWorkerResponse, error) { + log.L().Info("", zap.Stringer("payload", req), zap.String("request", "RegisterWorker")) isLeader, needForward := s.isLeaderAndNeedForward() if !isLeader { if needForward { @@ -290,62 +254,24 @@ func (s *Server) RegisterWorker(ctx context.Context, req *pb.RegisterWorkerReque return nil, terror.ErrMasterRequestIsNotForwardToLeader } - if !s.coordinator.IsStarted() { - respWorker := &pb.RegisterWorkerResponse{ - Result: false, - Msg: "coordinator not started, may not leader", - } - return respWorker, nil - } - k := common.WorkerRegisterKeyAdapter.Encode(req.Address) - v := req.Name - ectx, cancel := context.WithTimeout(ctx, etcdTimeouit) - defer cancel() - resp, err := s.etcdClient.Txn(ectx). - If(clientv3.Compare(clientv3.CreateRevision(k), "=", 0)). - Then(clientv3.OpPut(k, v)). - Else(clientv3.OpGet(k)). - Commit() + err := s.scheduler.AddWorker(req.Name, req.Address) if err != nil { - return nil, err - } - if !resp.Succeeded { - if len(resp.Responses) == 0 { - return nil, errors.Errorf("the response kv is invalid length, request key: %s", k) - } - kv := resp.Responses[0].GetResponseRange().GetKvs()[0] - kvs, err := common.WorkerRegisterKeyAdapter.Decode(string(kv.Key)) - if err != nil { - log.L().Error("decode worker register key from etcd failed", zap.Error(err)) - return &pb.RegisterWorkerResponse{ - Result: false, - Msg: errors.ErrorStack(err), - }, nil - } - address := kvs[0] - name := string(kv.Value) - if name != req.Name { - msg := fmt.Sprintf("the address %s already registered with name %s", address, name) - respSource := &pb.RegisterWorkerResponse{ - Result: false, - Msg: msg, - } - log.L().Error(msg) - return respSource, nil - } + return &pb.RegisterWorkerResponse{ + Result: false, + Msg: errors.ErrorStack(err), + }, nil } - s.coordinator.AddWorker(req.Name, req.Address, nil) log.L().Info("register worker successfully", zap.String("name", req.Name), zap.String("address", req.Address)) - respWorker := &pb.RegisterWorkerResponse{ + return &pb.RegisterWorkerResponse{ Result: true, - } - return respWorker, nil + }, nil } // OfflineWorker removes info of the worker which has been Closed, and all the worker are store in the path: // key: /dm-worker/r/address // value: name func (s *Server) OfflineWorker(ctx context.Context, req *pb.OfflineWorkerRequest) (*pb.OfflineWorkerResponse, error) { + log.L().Info("", zap.Stringer("payload", req), zap.String("request", "OfflineWorker")) isLeader, needForward := s.isLeaderAndNeedForward() if !isLeader { if needForward { @@ -354,45 +280,25 @@ func (s *Server) OfflineWorker(ctx context.Context, req *pb.OfflineWorkerRequest return nil, terror.ErrMasterRequestIsNotForwardToLeader } - if !s.coordinator.IsStarted() { - respWorker := &pb.OfflineWorkerResponse{ - Result: false, - Msg: "coordinator not started, may not leader", - } - return respWorker, nil - } - w := s.coordinator.GetWorkerByAddress(req.Address) - if w == nil || w.State() != coordinator.WorkerClosed { - respWorker := &pb.OfflineWorkerResponse{ - Result: false, - Msg: "worker which has not been closed is not allowed to offline", - } - return respWorker, nil - } - k := common.WorkerRegisterKeyAdapter.Encode(req.Address) - v := req.Name - ectx, cancel := context.WithTimeout(ctx, etcdTimeouit) - defer cancel() - resp, err := s.etcdClient.Txn(ectx). - If(clientv3.Compare(clientv3.Value(k), "=", v)). - Then(clientv3.OpDelete(k)). - Commit() + err := s.scheduler.RemoveWorker(req.Name) if err != nil { - return nil, err - } - if !resp.Succeeded { - respWorker := &pb.OfflineWorkerResponse{ + return &pb.OfflineWorkerResponse{ Result: false, - Msg: "delete from etcd failed, please check whether the name and address of worker match.", - } - return respWorker, nil + Msg: errors.ErrorStack(err), + }, nil } - s.coordinator.RemoveWorker(req.Name) log.L().Info("offline worker successfully", zap.String("name", req.Name), zap.String("address", req.Address)) - respWorker := &pb.OfflineWorkerResponse{ + return &pb.OfflineWorkerResponse{ Result: true, + }, nil +} + +func subtaskCfgPointersToInstances(stCfgPointers ...*config.SubTaskConfig) []config.SubTaskConfig { + stCfgs := make([]config.SubTaskConfig, 0, len(stCfgPointers)) + for _, stCfg := range stCfgPointers { + stCfgs = append(stCfgs, *stCfg) } - return respWorker, nil + return stCfgs } // StartTask implements MasterServer.StartTask @@ -417,8 +323,6 @@ func (s *Server) StartTask(ctx context.Context, req *pb.StartTaskRequest) (*pb.S log.L().Info("", zap.String("task name", cfg.Name), zap.Stringer("task", cfg), zap.String("request", "StartTask")) sourceRespCh := make(chan *pb.CommonWorkerResponse, len(stCfgs)) - var wg sync.WaitGroup - subSourceIDs := make([]string, 0, len(stCfgs)) if len(req.Sources) > 0 { // specify only start task on partial sources sourceCfg := make(map[string]*config.SubTaskConfig) @@ -435,64 +339,37 @@ func (s *Server) StartTask(ctx context.Context, req *pb.StartTaskRequest) (*pb.S } } - for _, stCfg := range stCfgs { - subSourceIDs = append(subSourceIDs, stCfg.SourceID) - wg.Add(1) - go s.ap.Emit(ctx, 0, func(args ...interface{}) { - defer wg.Done() - cfg, _ := args[0].(*config.SubTaskConfig) - worker, stCfgToml, _, err := s.taskConfigArgsExtractor(cfg) - if err != nil { - sourceRespCh <- errorCommonWorkerResponse(err.Error(), cfg.SourceID) - return - } - request := &workerrpc.Request{ - Type: workerrpc.CmdStartSubTask, - StartSubTask: &pb.StartSubTaskRequest{Task: stCfgToml}, - } - resp, err := worker.SendRequest(ctx, request, s.cfg.RPCTimeout) - if err != nil { - resp = &workerrpc.Response{ - Type: workerrpc.CmdStartSubTask, - StartSubTask: errorCommonWorkerResponse(err.Error(), cfg.SourceID), - } - } - resp.StartSubTask.Source = cfg.SourceID - sourceRespCh <- resp.StartSubTask - }, func(args ...interface{}) { - defer wg.Done() - cfg, _ := args[0].(*config.SubTaskConfig) - worker, _, _, err := s.taskConfigArgsExtractor(cfg) - if err != nil { - sourceRespCh <- errorCommonWorkerResponse(err.Error(), cfg.SourceID) - return - } - sourceRespCh <- errorCommonWorkerResponse(terror.ErrMasterNoEmitToken.Generate(worker.Address()).Error(), cfg.SourceID) - }, stCfg) - } - wg.Wait() - - sourceRespMap := make(map[string]*pb.CommonWorkerResponse, len(stCfgs)) - sources := make([]string, 0, len(stCfgs)) - for len(sourceRespCh) > 0 { - sourceResp := <-sourceRespCh - sourceRespMap[sourceResp.Source] = sourceResp - sources = append(sources, sourceResp.Source) + // there are invalid sourceCfgs + if len(sourceRespCh) > 0 { + sourceRespMap := make(map[string]*pb.CommonWorkerResponse, len(stCfgs)) + sources := make([]string, 0, len(stCfgs)) + for len(sourceRespCh) > 0 { + sourceResp := <-sourceRespCh + sourceRespMap[sourceResp.Source] = sourceResp + sources = append(sources, sourceResp.Source) + } + // TODO: simplify logic of response sort + sort.Strings(sources) + sourceResps := make([]*pb.CommonWorkerResponse, 0, len(sources)) + for _, source := range sources { + sourceResps = append(sourceResps, sourceRespMap[source]) + } + return &pb.StartTaskResponse{ + Result: false, + Sources: sourceResps, + }, nil } - // TODO: simplify logic of response sort - sort.Strings(sources) - sourceResps := make([]*pb.CommonWorkerResponse, 0, len(sources)) - for _, worker := range sources { - sourceResps = append(sourceResps, sourceRespMap[worker]) + err = s.scheduler.AddSubTasks(subtaskCfgPointersToInstances(stCfgs...)...) + if err != nil { + return &pb.StartTaskResponse{ + Result: false, + Msg: errors.ErrorStack(err), + }, nil } - // record task -> sources map - s.taskSources[cfg.Name] = subSourceIDs - return &pb.StartTaskResponse{ - Result: true, - Sources: sourceResps, + Result: true, }, nil } @@ -521,83 +398,36 @@ func (s *Server) OperateTask(ctx context.Context, req *pb.OperateTaskRequest) (* resp.Msg = fmt.Sprintf("task %s has no source or not exist, please check the task name and status", req.Name) return resp, nil } - workerRespCh := make(chan *pb.OperateSubTaskResponse, len(sources)) - - handleErr := func(err error, source string) { - log.L().Error("response error", zap.Error(err)) - workerResp := &pb.OperateSubTaskResponse{ - Op: req.Op, - Result: false, - Source: source, - Msg: err.Error(), + var expect pb.Stage + switch req.Op { + case pb.TaskOp_Pause: + expect = pb.Stage_Paused + case pb.TaskOp_Resume: + expect = pb.Stage_Running + case pb.TaskOp_Stop: + err := s.scheduler.RemoveSubTasks(req.Name, sources...) + if err != nil { + resp.Msg = errors.ErrorStack(err) + return resp, nil } - workerRespCh <- workerResp - } - - subReq := &workerrpc.Request{ - Type: workerrpc.CmdOperateSubTask, - OperateSubTask: &pb.OperateSubTaskRequest{ - Op: req.Op, - Name: req.Name, - }, - } - - var wg sync.WaitGroup - for _, source := range sources { - wg.Add(1) - go s.ap.Emit(ctx, 0, func(args ...interface{}) { - defer wg.Done() - sourceID, _ := args[0].(string) - worker1 := s.coordinator.GetWorkerBySourceID(sourceID) - if worker1 == nil { - err := terror.ErrMasterWorkerArgsExtractor.Generatef("%s relevant worker-client not found", sourceID) - handleErr(err, sourceID) - return - } - resp, err := worker1.SendRequest(ctx, subReq, s.cfg.RPCTimeout) - if err != nil { - resp = &workerrpc.Response{ - Type: workerrpc.CmdOperateSubTask, - OperateSubTask: &pb.OperateSubTaskResponse{ - Op: req.Op, - Result: false, - Msg: err.Error(), - }, - } - } - resp.OperateSubTask.Source = sourceID - workerRespCh <- resp.OperateSubTask - }, func(args ...interface{}) { - defer wg.Done() - sourceID, _ := args[0].(string) - handleErr(terror.ErrMasterNoEmitToken.Generate(sourceID), sourceID) - }, source) - } - wg.Wait() - - workerRespMap := make(map[string]*pb.OperateSubTaskResponse, len(sources)) - for len(workerRespCh) > 0 { - workerResp := <-workerRespCh - workerRespMap[workerResp.Source] = workerResp - } - - workerResps := make([]*pb.OperateSubTaskResponse, 0, len(sources)) - for _, source := range sources { - workerResps = append(workerResps, workerRespMap[source]) + resp.Result = true + return resp, nil + default: + resp.Msg = terror.ErrMasterInvalidOperateTaskOp.Generate(req.Op.String()).Error() + return resp, nil } - - if req.Op == pb.TaskOp_Stop { - // remove (partial / all) workers for a task - s.removeTaskWorkers(req.Name, sources) + err := s.scheduler.UpdateExpectSubTaskStage(expect, req.Name, sources...) + if err != nil { + resp.Msg = errors.ErrorStack(err) + return resp, nil } resp.Result = true - resp.Sources = workerResps - return resp, nil } // UpdateTask implements MasterServer.UpdateTask +// TODO: support update task later func (s *Server) UpdateTask(ctx context.Context, req *pb.UpdateTaskRequest) (*pb.UpdateTaskResponse, error) { log.L().Info("", zap.Stringer("payload", req), zap.String("request", "UpdateTask")) @@ -637,42 +467,42 @@ func (s *Server) UpdateTask(ctx context.Context, req *pb.UpdateTaskRequest) (*pb } } - var wg sync.WaitGroup - for _, stCfg := range stCfgs { - wg.Add(1) - go s.ap.Emit(ctx, 0, func(args ...interface{}) { - defer wg.Done() - cfg, _ := args[0].(*config.SubTaskConfig) - worker, stCfgToml, _, err := s.taskConfigArgsExtractor(cfg) - if err != nil { - workerRespCh <- errorCommonWorkerResponse(err.Error(), cfg.SourceID) - return - } - request := &workerrpc.Request{ - Type: workerrpc.CmdUpdateSubTask, - UpdateSubTask: &pb.UpdateSubTaskRequest{Task: stCfgToml}, - } - resp, err := worker.SendRequest(ctx, request, s.cfg.RPCTimeout) - if err != nil { - resp = &workerrpc.Response{ - Type: workerrpc.CmdUpdateSubTask, - UpdateSubTask: errorCommonWorkerResponse(err.Error(), cfg.SourceID), - } - } - resp.UpdateSubTask.Source = cfg.SourceID - workerRespCh <- resp.UpdateSubTask - }, func(args ...interface{}) { - defer wg.Done() - cfg, _ := args[0].(*config.SubTaskConfig) - worker, _, _, err := s.taskConfigArgsExtractor(cfg) - if err != nil { - workerRespCh <- errorCommonWorkerResponse(err.Error(), cfg.SourceID) - return - } - workerRespCh <- errorCommonWorkerResponse(terror.ErrMasterNoEmitToken.Generate(worker.Address()).Error(), cfg.SourceID) - }, stCfg) - } - wg.Wait() + //var wg sync.WaitGroup + //for _, stCfg := range stCfgs { + // wg.Add(1) + // go s.ap.Emit(ctx, 0, func(args ...interface{}) { + // defer wg.Done() + // cfg, _ := args[0].(*config.SubTaskConfig) + // worker, stCfgToml, _, err := s.taskConfigArgsExtractor(cfg) + // if err != nil { + // workerRespCh <- errorCommonWorkerResponse(err.Error(), cfg.SourceID) + // return + // } + // request := &workerrpc.Request{ + // Type: workerrpc.CmdUpdateSubTask, + // UpdateSubTask: &pb.UpdateSubTaskRequest{Task: stCfgToml}, + // } + // resp, err := worker.SendRequest(ctx, request, s.cfg.RPCTimeout) + // if err != nil { + // resp = &workerrpc.Response{ + // Type: workerrpc.CmdUpdateSubTask, + // UpdateSubTask: errorCommonWorkerResponse(err.Error(), cfg.SourceID), + // } + // } + // resp.UpdateSubTask.Source = cfg.SourceID + // workerRespCh <- resp.UpdateSubTask + // }, func(args ...interface{}) { + // defer wg.Done() + // cfg, _ := args[0].(*config.SubTaskConfig) + // worker, _, _, err := s.taskConfigArgsExtractor(cfg) + // if err != nil { + // workerRespCh <- errorCommonWorkerResponse(err.Error(), cfg.SourceID) + // return + // } + // workerRespCh <- errorCommonWorkerResponse(terror.ErrMasterNoEmitToken.Generate(worker.Address()).Error(), cfg.SourceID) + // }, stCfg) + //} + //wg.Wait() workerRespMap := make(map[string]*pb.CommonWorkerResponse, len(stCfgs)) workers := make([]string, 0, len(stCfgs)) @@ -699,42 +529,39 @@ type hasWokers interface { GetName() string } -func extractWorkers(s *Server, req hasWokers) ([]string, error) { - workers := make([]string, 0, len(s.coordinator.GetAllWorkers())) +func extractSources(s *Server, req hasWokers) ([]string, error) { + var sources []string if len(req.GetSources()) > 0 { // query specified dm-workers invalidWorkers := make([]string, 0, len(req.GetSources())) for _, source := range req.GetSources() { - w := s.coordinator.GetWorkerBySourceID(source) - if w == nil || w.State() == coordinator.WorkerClosed { + w := s.scheduler.GetWorkerBySource(source) + if w == nil || w.Stage() == scheduler.WorkerOffline { invalidWorkers = append(invalidWorkers, source) } } if len(invalidWorkers) > 0 { return nil, errors.Errorf("%s relevant worker-client not found", strings.Join(invalidWorkers, ", ")) } - workers = req.GetSources() + sources = req.GetSources() } else if len(req.GetName()) > 0 { - // query specified task's workers - workers = s.getTaskResources(req.GetName()) - if len(workers) == 0 { - return nil, errors.Errorf("task %s has no workers or not exist, can try `refresh-worker-tasks` cmd first", req.GetName()) + // query specified task's sources + sources = s.getTaskResources(req.GetName()) + if len(sources) == 0 { + return nil, errors.Errorf("task %s has no source or not exist, can try `refresh-worker-tasks` cmd first", req.GetName()) } } else { - // query all workers - log.L().Info("get workers") - for source := range s.coordinator.GetRunningMysqlSource() { - workers = append(workers, source) - } + // query all sources + log.L().Info("get sources") + sources = s.scheduler.BoundSources() } - return workers, nil + return sources, nil } // QueryStatus implements MasterServer.QueryStatus func (s *Server) QueryStatus(ctx context.Context, req *pb.QueryStatusListRequest) (*pb.QueryStatusListResponse, error) { log.L().Info("", zap.Stringer("payload", req), zap.String("request", "QueryStatus")) - isLeader, needForward := s.isLeaderAndNeedForward() if !isLeader { if needForward { @@ -743,7 +570,7 @@ func (s *Server) QueryStatus(ctx context.Context, req *pb.QueryStatusListRequest return nil, terror.ErrMasterRequestIsNotForwardToLeader } - sources, err := extractWorkers(s, req) + sources, err := extractSources(s, req) if err != nil { return &pb.QueryStatusListResponse{ Result: false, @@ -773,7 +600,6 @@ func (s *Server) QueryStatus(ctx context.Context, req *pb.QueryStatusListRequest // QueryError implements MasterServer.QueryError func (s *Server) QueryError(ctx context.Context, req *pb.QueryErrorListRequest) (*pb.QueryErrorListResponse, error) { log.L().Info("", zap.Stringer("payload", req), zap.String("request", "QueryError")) - isLeader, needForward := s.isLeaderAndNeedForward() if !isLeader { if needForward { @@ -782,7 +608,7 @@ func (s *Server) QueryError(ctx context.Context, req *pb.QueryErrorListRequest) return nil, terror.ErrMasterRequestIsNotForwardToLeader } - sources, err := extractWorkers(s, req) + sources, err := extractSources(s, req) if err != nil { return &pb.QueryErrorListResponse{ Result: false, @@ -935,9 +761,9 @@ func (s *Server) HandleSQLs(ctx context.Context, req *pb.HandleSQLsRequest) (*pb SqlPattern: req.SqlPattern, }, } - worker := s.coordinator.GetWorkerBySourceID(req.Source) + worker := s.scheduler.GetWorkerBySource(req.Source) if worker == nil { - resp.Msg = fmt.Sprintf("worker %s client not found in %v", req.Source, s.coordinator.GetAllWorkers()) + resp.Msg = fmt.Sprintf("source %s not found in bound sources %v", req.Source, s.scheduler.BoundSources()) return resp, nil } response, err := worker.SendRequest(ctx, subReq, s.cfg.RPCTimeout) @@ -980,7 +806,7 @@ func (s *Server) PurgeWorkerRelay(ctx context.Context, req *pb.PurgeWorkerRelayR wg.Add(1) go func(source string) { defer wg.Done() - worker := s.coordinator.GetWorkerBySourceID(source) + worker := s.scheduler.GetWorkerBySource(source) if worker == nil { workerRespCh <- errorCommonWorkerResponse(fmt.Sprintf("worker %s relevant worker-client not found", source), source) return @@ -1042,7 +868,7 @@ func (s *Server) SwitchWorkerRelayMaster(ctx context.Context, req *pb.SwitchWork go s.ap.Emit(ctx, 0, func(args ...interface{}) { defer wg.Done() sourceID, _ := args[0].(string) - worker := s.coordinator.GetWorkerBySourceID(sourceID) + worker := s.scheduler.GetWorkerBySource(sourceID) if worker == nil { err := terror.ErrMasterWorkerArgsExtractor.Generatef("%s relevant worker-client not found", sourceID) handleErr(err, sourceID) @@ -1090,7 +916,6 @@ func (s *Server) SwitchWorkerRelayMaster(ctx context.Context, req *pb.SwitchWork // OperateWorkerRelayTask implements MasterServer.OperateWorkerRelayTask func (s *Server) OperateWorkerRelayTask(ctx context.Context, req *pb.OperateWorkerRelayRequest) (*pb.OperateWorkerRelayResponse, error) { log.L().Info("", zap.Stringer("payload", req), zap.String("request", "OperateWorkerRelayTask")) - isLeader, needForward := s.isLeaderAndNeedForward() if !isLeader { if needForward { @@ -1099,111 +924,39 @@ func (s *Server) OperateWorkerRelayTask(ctx context.Context, req *pb.OperateWork return nil, terror.ErrMasterRequestIsNotForwardToLeader } - request := &workerrpc.Request{ - Type: workerrpc.CmdOperateRelay, - OperateRelay: &pb.OperateRelayRequest{Op: req.Op}, - } - workerRespCh := make(chan *pb.OperateRelayResponse, len(req.Sources)) - var wg sync.WaitGroup - for _, source := range req.Sources { - wg.Add(1) - go func(source string) { - defer wg.Done() - worker := s.coordinator.GetWorkerBySourceID(source) - if worker == nil { - workerResp := &pb.OperateRelayResponse{ - Op: req.Op, - Result: false, - Source: source, - Msg: fmt.Sprintf("%s relevant worker-client not found", source), - } - workerRespCh <- workerResp - return - } - resp, err := worker.SendRequest(ctx, request, s.cfg.RPCTimeout) - workerResp := &pb.OperateRelayResponse{} - if err != nil { - workerResp = &pb.OperateRelayResponse{ - Result: false, - Msg: errors.ErrorStack(err), - } - } else { - workerResp = resp.OperateRelay - } - workerResp.Op = req.Op - workerResp.Source = source - workerRespCh <- workerResp - }(source) - } - wg.Wait() - - workerRespMap := make(map[string]*pb.OperateRelayResponse, len(req.Sources)) - for len(workerRespCh) > 0 { - workerResp := <-workerRespCh - workerRespMap[workerResp.Source] = workerResp - } - - sort.Strings(req.Sources) - workerResps := make([]*pb.OperateRelayResponse, 0, len(req.Sources)) - for _, worker := range req.Sources { - workerResps = append(workerResps, workerRespMap[worker]) - } - - return &pb.OperateWorkerRelayResponse{ - Result: true, - Sources: workerResps, - }, nil -} - -// replaceTaskWorkers replaces the whole task-workers mapper -func (s *Server) replaceTaskWorkers(taskWorkers map[string][]string) { - for task := range taskWorkers { - sort.Strings(taskWorkers[task]) - } - s.Lock() - defer s.Unlock() - s.taskSources = taskWorkers -} - -// removeTaskWorkers remove (partial / all) workers for a task -func (s *Server) removeTaskWorkers(task string, workers []string) { - toRemove := make(map[string]struct{}) - for _, w := range workers { - toRemove[w] = struct{}{} - } - - s.Lock() - defer s.Unlock() - if _, ok := s.taskSources[task]; !ok { - log.L().Warn("not found workers", zap.String("task", task)) - return + resp := &pb.OperateWorkerRelayResponse{ + Op: req.Op, + Result: false, } - remain := make([]string, 0, len(s.taskSources[task])) - for _, worker := range s.taskSources[task] { - if _, ok := toRemove[worker]; !ok { - remain = append(remain, worker) - } + var expect pb.Stage + switch req.Op { + case pb.RelayOp_ResumeRelay: + expect = pb.Stage_Running + case pb.RelayOp_PauseRelay: + expect = pb.Stage_Paused + default: + resp.Msg = "request relay Op is not supported by OperateWorkerRelay, please check it again" + return resp, nil } - if len(remain) == 0 { - delete(s.taskSources, task) - log.L().Info("remove task from taskWorker", zap.String("task", task)) - } else { - s.taskSources[task] = remain - log.L().Info("update workers of task", zap.String("task", task), zap.Strings("reamin workers", remain)) + err := s.scheduler.UpdateExpectRelayStage(expect, req.Sources...) + if err != nil { + resp.Msg = errors.ErrorStack(err) + return resp, nil } + resp.Result = true + return resp, nil } // getTaskResources gets workers relevant to specified task func (s *Server) getTaskResources(task string) []string { s.Lock() defer s.Unlock() - workers, ok := s.taskSources[task] - if !ok { - return []string{} - } + cfgM := s.scheduler.GetSubTaskCfgsByTask(task) // do a copy - ret := make([]string, 0, len(workers)) - ret = append(ret, workers...) + ret := make([]string, 0, len(cfgM)) + for source := range cfgM { + ret = append(ret, source) + } return ret } @@ -1242,7 +995,7 @@ func (s *Server) getStatusFromWorkers(ctx context.Context, sources []string, tas go s.ap.Emit(ctx, 0, func(args ...interface{}) { defer wg.Done() sourceID, _ := args[0].(string) - worker := s.coordinator.GetWorkerBySourceID(sourceID) + worker := s.scheduler.GetWorkerBySource(sourceID) if worker == nil { err := terror.ErrMasterWorkerArgsExtractor.Generatef("%s relevant worker-client not found", sourceID) handleErr(err, sourceID) @@ -1295,7 +1048,7 @@ func (s *Server) getErrorFromWorkers(ctx context.Context, sources []string, task go s.ap.Emit(ctx, 0, func(args ...interface{}) { defer wg.Done() sourceID, _ := args[0].(string) - worker := s.coordinator.GetWorkerBySourceID(sourceID) + worker := s.scheduler.GetWorkerBySource(sourceID) if worker == nil { err := terror.ErrMasterWorkerArgsExtractor.Generatef("%s relevant worker-client not found", sourceID) handleErr(err, sourceID) @@ -1396,7 +1149,7 @@ func (s *Server) UpdateWorkerRelayConfig(ctx context.Context, req *pb.UpdateWork source := req.Source content := req.Config - worker := s.coordinator.GetWorkerBySourceID(source) + worker := s.scheduler.GetWorkerBySource(source) if worker == nil { return errorCommonWorkerResponse(fmt.Sprintf("source %s relevant source-client not found", source), source), nil } @@ -1414,16 +1167,16 @@ func (s *Server) UpdateWorkerRelayConfig(ctx context.Context, req *pb.UpdateWork } // TODO: refine the call stack of this API, query worker configs that we needed only -func (s *Server) getWorkerConfigs(ctx context.Context, workers []*config.MySQLInstance) (map[string]config.DBConfig, error) { +func (s *Server) getSourceConfigs(sources []*config.MySQLInstance) (map[string]config.DBConfig, error) { cfgs := make(map[string]config.DBConfig) - for _, w := range workers { - if cfg := s.coordinator.GetConfigBySourceID(w.SourceID); cfg != nil { + for _, source := range sources { + if cfg := s.scheduler.GetSourceCfgByID(source.SourceID); cfg != nil { // check the password _, err := cfg.DecryptPassword() if err != nil { return nil, err } - cfgs[w.SourceID] = cfg.From + cfgs[source.SourceID] = cfg.From } } return cfgs, nil @@ -1444,7 +1197,7 @@ func (s *Server) MigrateWorkerRelay(ctx context.Context, req *pb.MigrateWorkerRe source := req.Source binlogPos := req.BinlogPos binlogName := req.BinlogName - worker := s.coordinator.GetWorkerBySourceID(source) + worker := s.scheduler.GetWorkerBySource(source) if worker == nil { return errorCommonWorkerResponse(fmt.Sprintf("source %s relevant source-client not found", source), source), nil } @@ -1486,100 +1239,70 @@ func (s *Server) CheckTask(ctx context.Context, req *pb.CheckTaskRequest) (*pb.C }, nil } -func makeMysqlWorkerResponse(err error) (*pb.MysqlWorkerResponse, error) { - return &pb.MysqlWorkerResponse{ - Result: false, - Msg: errors.ErrorStack(err), - }, nil +func parseAndAdjustSourceConfig(c *config.SourceConfig, content string) error { + if err := c.Parse(content); err != nil { + return err + } + dbConfig, err := c.GenerateDBConfig() + if err != nil { + return err + } + fromDB, err := conn.DefaultDBProvider.Apply(*dbConfig) + if err != nil { + return err + } + if err = c.Adjust(fromDB.DB); err != nil { + return err + } + if _, err = c.Toml(); err != nil { + return err + } + return nil } -// OperateMysqlWorker will create or update a Worker -func (s *Server) OperateMysqlWorker(ctx context.Context, req *pb.MysqlWorkerRequest) (*pb.MysqlWorkerResponse, error) { +// OperateSource will create or update an upstream source. +func (s *Server) OperateSource(ctx context.Context, req *pb.OperateSourceRequest) (*pb.OperateSourceResponse, error) { + log.L().Info("", zap.Stringer("payload", req), zap.String("request", "OperateSource")) isLeader, needForward := s.isLeaderAndNeedForward() if !isLeader { if needForward { - return s.leaderClient.OperateMysqlWorker(ctx, req) + return s.leaderClient.OperateSource(ctx, req) } return nil, terror.ErrMasterRequestIsNotForwardToLeader } - cfg := config.NewMysqlConfig() - if err := cfg.Parse(req.Config); err != nil { - return makeMysqlWorkerResponse(err) - } - - dbConfig, err := cfg.GenerateDBConfig() - if err != nil { - return makeMysqlWorkerResponse(err) + cfg := config.NewSourceConfig() + err := parseAndAdjustSourceConfig(cfg, req.Config) + resp := &pb.OperateSourceResponse{ + Result: false, } - fromDB, err := conn.DefaultDBProvider.Apply(*dbConfig) if err != nil { - return makeMysqlWorkerResponse(err) - } - if err = cfg.Adjust(fromDB.DB); err != nil { - return makeMysqlWorkerResponse(err) - } - if req.Config, err = cfg.Toml(); err != nil { - return makeMysqlWorkerResponse(err) + resp.Msg = errors.ErrorStack(err) + return resp, nil } - var resp *pb.MysqlWorkerResponse switch req.Op { - case pb.WorkerOp_StartWorker: - w := s.coordinator.GetWorkerBySourceID(cfg.SourceID) - if w != nil { - return &pb.MysqlWorkerResponse{ - Result: false, - Msg: "Create worker failed. worker has been started", - }, nil - } - w, err = s.coordinator.AcquireWorkerForSource(cfg.SourceID) + case pb.SourceOp_StartSource: + err := s.scheduler.AddSourceCfg(*cfg) if err != nil { - return makeMysqlWorkerResponse(err) + resp.Msg = errors.ErrorStack(err) + return resp, nil } - - resp, err = w.OperateMysqlWorker(ctx, req, s.cfg.RPCTimeout) + case pb.SourceOp_UpdateSource: + // TODO: support SourceOp_UpdateSource later + resp.Msg = "Update worker config is not supported by dm-ha now" + return resp, nil + case pb.SourceOp_StopSource: + err := s.scheduler.RemoveSourceCfg(cfg.SourceID) if err != nil { - // TODO: handle error or backoff - s.coordinator.HandleStartedWorker(w, cfg, false) - return makeMysqlWorkerResponse(err) - } - // TODO: handle error or backoff - s.coordinator.HandleStartedWorker(w, cfg, true) - case pb.WorkerOp_UpdateConfig: - w := s.coordinator.GetWorkerBySourceID(cfg.SourceID) - if w == nil { - return &pb.MysqlWorkerResponse{ - Result: false, - Msg: "Update worker config failed. worker has not been started", - }, nil - } - if resp, err = w.OperateMysqlWorker(ctx, req, s.cfg.RPCTimeout); err != nil { - return makeMysqlWorkerResponse(err) - } - case pb.WorkerOp_StopWorker: - w := s.coordinator.GetWorkerBySourceID(cfg.SourceID) - if w == nil { - return &pb.MysqlWorkerResponse{ - Result: false, - Msg: "Stop Mysql-worker failed. worker has not been started", - }, nil - } - if resp, err = w.OperateMysqlWorker(ctx, req, s.cfg.RPCTimeout); err != nil { - return &pb.MysqlWorkerResponse{ - Result: false, - Msg: errors.ErrorStack(err), - }, nil - } - if resp.Result { - s.coordinator.HandleStoppedWorker(w, cfg) + resp.Msg = errors.ErrorStack(err) + return resp, nil } default: - return &pb.MysqlWorkerResponse{ - Result: false, - Msg: "invalid operate on worker", - }, nil + resp.Msg = "invalid operate on worker" + return resp, nil } + resp.Result = true return resp, nil } @@ -1590,7 +1313,7 @@ func (s *Server) generateSubTask(ctx context.Context, task string) (*config.Task return nil, nil, terror.WithClass(err, terror.ClassDMMaster) } - sourceCfgs, err := s.getWorkerConfigs(ctx, cfg.MySQLInstances) + sourceCfgs, err := s.getSourceConfigs(cfg.MySQLInstances) if err != nil { return nil, nil, err } @@ -1614,13 +1337,13 @@ var ( // taskConfigArgsExtractor extracts SubTaskConfig from args and returns its relevant // grpc client, worker id (host:port), subtask config in toml, task name and error -func (s *Server) taskConfigArgsExtractor(cfg *config.SubTaskConfig) (*coordinator.Worker, string, string, error) { +func (s *Server) taskConfigArgsExtractor(cfg *config.SubTaskConfig) (*scheduler.Worker, string, string, error) { handleErr := func(err error) error { log.L().Error("response", zap.Error(err)) return err } - worker := s.coordinator.GetWorkerBySourceID(cfg.SourceID) + worker := s.scheduler.GetWorkerBySource(cfg.SourceID) if worker == nil { return nil, "", "", handleErr(terror.ErrMasterTaskConfigExtractor.Generatef("%s relevant worker-client not found", cfg.SourceID)) } @@ -1635,9 +1358,9 @@ func (s *Server) taskConfigArgsExtractor(cfg *config.SubTaskConfig) (*coordinato // workerArgsExtractor extracts worker from args and returns its relevant // grpc client, worker id (host:port) and error -func (s *Server) workerArgsExtractor(source string) (*coordinator.Worker, error) { +func (s *Server) workerArgsExtractor(source string) (*scheduler.Worker, error) { log.L().Info("Debug get worker", zap.String("source-id", source)) - cli := s.coordinator.GetWorkerBySourceID(source) + cli := s.scheduler.GetWorkerBySource(source) if cli == nil { return nil, terror.ErrMasterWorkerArgsExtractor.Generatef("%s relevant worker-client not found", source) } diff --git a/dm/master/server_test.go b/dm/master/server_test.go index 502cac3bdb..6a82336f81 100644 --- a/dm/master/server_test.go +++ b/dm/master/server_test.go @@ -20,6 +20,7 @@ import ( "io/ioutil" "net/http" "strings" + "sync" "testing" "time" @@ -27,15 +28,17 @@ import ( "github.com/pingcap/check" "github.com/pingcap/errors" "github.com/pingcap/pd/pkg/tempurl" + "go.etcd.io/etcd/clientv3" "go.etcd.io/etcd/integration" "github.com/pingcap/dm/checker" "github.com/pingcap/dm/dm/config" - "github.com/pingcap/dm/dm/master/coordinator" + "github.com/pingcap/dm/dm/master/scheduler" "github.com/pingcap/dm/dm/master/workerrpc" "github.com/pingcap/dm/dm/pb" "github.com/pingcap/dm/dm/pbmock" "github.com/pingcap/dm/pkg/etcdutil" + "github.com/pingcap/dm/pkg/ha" "github.com/pingcap/dm/pkg/log" "github.com/pingcap/dm/pkg/terror" "github.com/pingcap/dm/pkg/utils" @@ -132,22 +135,25 @@ syncers: var ( errGRPCFailed = "test grpc request failed" errGRPCFailedReg = fmt.Sprintf("(?m).*%s.*", errGRPCFailed) - errExecDDLFailed = "dm-worker exec ddl failed" - msgNoSubTask = "no sub task started" - msgNoSubTaskReg = fmt.Sprintf(".*%s", msgNoSubTask) errCheckSyncConfig = "(?m).*check sync config with error.*" errCheckSyncConfigReg = fmt.Sprintf("(?m).*%s.*", errCheckSyncConfig) testEtcdCluster *integration.ClusterV3 + keepAliveTTL = int64(1) + etcdTestCli *clientv3.Client ) func TestMaster(t *testing.T) { + log.InitLogger(&log.Config{}) testEtcdCluster = integration.NewClusterV3(t, &integration.ClusterConfig{Size: 1}) defer testEtcdCluster.Terminate(t) + etcdTestCli = testEtcdCluster.RandClient() + check.TestingT(t) } type testMaster struct { + workerClients map[string]workerrpc.Client } var _ = check.Suite(&testMaster{}) @@ -155,6 +161,8 @@ var _ = check.Suite(&testMaster{}) func (t *testMaster) SetUpSuite(c *check.C) { err := log.InitLogger(&log.Config{}) c.Assert(err, check.IsNil) + t.workerClients = make(map[string]workerrpc.Client) + clearEtcdEnv(c) } func newMockRPCClient(client pb.WorkerClient) workerrpc.Client { @@ -172,6 +180,24 @@ func extractWorkerSource(deployMapper []*DeployMapper) ([]string, []string) { return sources, workers } +func clearEtcdEnv(c *check.C) { + c.Assert(ha.ClearTestInfoOperation(etcdTestCli), check.IsNil) +} + +func clearSchedulerEnv(c *check.C, cancel context.CancelFunc, wg *sync.WaitGroup) { + cancel() + wg.Wait() + clearEtcdEnv(c) +} + +func makeNilWorkerClients(workers []string) map[string]workerrpc.Client { + nilWorkerClients := make(map[string]workerrpc.Client, len(workers)) + for _, worker := range workers { + nilWorkerClients[worker] = nil + } + return nilWorkerClients +} + func testDefaultMasterServer(c *check.C) *Server { cfg := NewConfig() err := cfg.Parse([]string{"-config=./dm-master.toml"}) @@ -184,75 +210,33 @@ func testDefaultMasterServer(c *check.C) *Server { return server } -func testGenSubTaskConfig(c *check.C, server *Server, ctrl *gomock.Controller) map[string]*config.SubTaskConfig { - sources, workers := extractWorkerSource(server.cfg.Deploy) - nilWorkerClients := make(map[string]workerrpc.Client) - for _, worker := range workers { - nilWorkerClients[worker] = nil - } - server.coordinator = testMockCoordinator(c, sources, workers, "", nilWorkerClients) - - workerCfg := make(map[string]*config.SubTaskConfig) - _, stCfgs, err := server.generateSubTask(context.Background(), taskConfig) - c.Assert(err, check.IsNil) - for _, stCfg := range stCfgs { - worker, ok := server.cfg.DeployMap[stCfg.SourceID] - c.Assert(ok, check.IsTrue) - workerCfg[worker] = stCfg - } - return workerCfg -} - -func testMockCoordinator(c *check.C, sources, workers []string, password string, workerClients map[string]workerrpc.Client) *coordinator.Coordinator { - coordinator2 := coordinator.NewCoordinator() - err := coordinator2.Start(context.Background(), testEtcdCluster.RandClient()) +func testMockScheduler(ctx context.Context, wg *sync.WaitGroup, c *check.C, sources, workers []string, password string, workerClients map[string]workerrpc.Client) (*scheduler.Scheduler, []context.CancelFunc) { + logger := log.L() + scheduler2 := scheduler.NewScheduler(&logger) + err := scheduler2.Start(ctx, etcdTestCli) c.Assert(err, check.IsNil) + cancels := make([]context.CancelFunc, 0, 2) for i := range workers { - // add worker to coordinator's workers map - coordinator2.AddWorker("worker"+string(i), workers[i], workerClients[workers[i]]) - // set this worker's status to workerFree - coordinator2.AddWorker("worker"+string(i), workers[i], nil) + // add worker to scheduler's workers map + name := workers[i] + c.Assert(scheduler2.AddWorker(name, workers[i]), check.IsNil) + scheduler2.SetWorkerClientForTest(name, workerClients[workers[i]]) // operate mysql config on this worker - cfg := &config.MysqlConfig{SourceID: sources[i], From: config.DBConfig{Password: password}} - w, err := coordinator2.AcquireWorkerForSource(cfg.SourceID) - c.Assert(err, check.IsNil) - coordinator2.HandleStartedWorker(w, cfg, true) - } - return coordinator2 -} - -func testMockStartTask(c *check.C, server *Server, ctrl *gomock.Controller, workerCfg map[string]*config.SubTaskConfig, rpcSuccess bool) { - for _, deploy := range server.cfg.Deploy { - mockWorkerClient := pbmock.NewMockWorkerClient(ctrl) - - stCfg, ok := workerCfg[deploy.Worker] - c.Assert(ok, check.IsTrue) - stCfgToml, err := stCfg.Toml() - c.Assert(err, check.IsNil) - - // mock start sub task - rets := make([]interface{}, 0, 2) - if rpcSuccess { - rets = []interface{}{ - &pb.CommonWorkerResponse{ - Result: true, - Source: deploy.Source, - }, - nil, - } - } else { - rets = []interface{}{ - nil, - errors.New(errGRPCFailed), - } - } - mockWorkerClient.EXPECT().StartSubTask( - gomock.Any(), - &pb.StartSubTaskRequest{Task: stCfgToml}, - ).Return(rets...) - - server.workerClients[deploy.Worker] = newMockRPCClient(mockWorkerClient) - } + cfg := &config.SourceConfig{SourceID: sources[i], From: config.DBConfig{Password: password}} + c.Assert(scheduler2.AddSourceCfg(*cfg), check.IsNil) + wg.Add(1) + ctx1, cancel1 := context.WithCancel(ctx) + cancels = append(cancels, cancel1) + go func(ctx context.Context, workerName string) { + defer wg.Done() + c.Assert(ha.KeepAlive(ctx, etcdTestCli, workerName, keepAliveTTL), check.IsNil) + }(ctx1, name) + c.Assert(utils.WaitSomething(30, 10*time.Millisecond, func() bool { + w := scheduler2.GetWorkerBySource(sources[i]) + return w != nil && w.BaseInfo().Name == name + }), check.IsTrue) + } + return scheduler2, cancels } func (t *testMaster) TestQueryStatus(c *check.C) { @@ -269,22 +253,27 @@ func (t *testMaster) TestQueryStatus(c *check.C) { gomock.Any(), &pb.QueryStatusRequest{}, ).Return(&pb.QueryStatusResponse{Result: true}, nil) - server.workerClients[deploy.Worker] = newMockRPCClient(mockWorkerClient) + t.workerClients[deploy.Worker] = newMockRPCClient(mockWorkerClient) } - server.coordinator = testMockCoordinator(c, sources, workers, "", server.workerClients) + var wg sync.WaitGroup + ctx, cancel := context.WithCancel(context.Background()) + server.scheduler, _ = testMockScheduler(ctx, &wg, c, sources, workers, "", t.workerClients) resp, err := server.QueryStatus(context.Background(), &pb.QueryStatusListRequest{}) c.Assert(err, check.IsNil) c.Assert(resp.Result, check.IsTrue) + clearSchedulerEnv(c, cancel, &wg) + // query specified sources for _, deploy := range server.cfg.Deploy { mockWorkerClient := pbmock.NewMockWorkerClient(ctrl) mockWorkerClient.EXPECT().QueryStatus( gomock.Any(), &pb.QueryStatusRequest{}, ).Return(&pb.QueryStatusResponse{Result: true}, nil) - server.workerClients[deploy.Worker] = newMockRPCClient(mockWorkerClient) + t.workerClients[deploy.Worker] = newMockRPCClient(mockWorkerClient) } - server.coordinator = testMockCoordinator(c, sources, workers, "", server.workerClients) + ctx, cancel = context.WithCancel(context.Background()) + server.scheduler, _ = testMockScheduler(ctx, &wg, c, sources, workers, "", t.workerClients) resp, err = server.QueryStatus(context.Background(), &pb.QueryStatusListRequest{ Sources: sources, }) @@ -305,8 +294,8 @@ func (t *testMaster) TestQueryStatus(c *check.C) { }) c.Assert(err, check.IsNil) c.Assert(resp.Result, check.IsFalse) - c.Assert(resp.Msg, check.Matches, "task .* has no workers or not exist, can try `refresh-worker-tasks` cmd first") - + c.Assert(resp.Msg, check.Matches, "task .* has no source or not exist, can try `refresh-worker-tasks` cmd first") + clearSchedulerEnv(c, cancel, &wg) // TODO: test query with correct task name, this needs to add task first } @@ -317,7 +306,10 @@ func (t *testMaster) TestCheckTask(c *check.C) { server := testDefaultMasterServer(c) sources, workers := extractWorkerSource(server.cfg.Deploy) - server.coordinator = testMockCoordinator(c, sources, workers, "", server.workerClients) + t.workerClients = makeNilWorkerClients(workers) + var wg sync.WaitGroup + ctx, cancel := context.WithCancel(context.Background()) + server.scheduler, _ = testMockScheduler(ctx, &wg, c, sources, workers, "", t.workerClients) resp, err := server.CheckTask(context.Background(), &pb.CheckTaskRequest{ Task: taskConfig, }) @@ -330,14 +322,17 @@ func (t *testMaster) TestCheckTask(c *check.C) { }) c.Assert(err, check.IsNil) c.Assert(resp.Result, check.IsFalse) + clearSchedulerEnv(c, cancel, &wg) - // simulate invalid password returned from coordinator, so cfg.SubTaskConfigs will fail - server.coordinator = testMockCoordinator(c, sources, workers, "invalid-encrypt-password", server.workerClients) + // simulate invalid password returned from scheduler, so cfg.SubTaskConfigs will fail + ctx, cancel = context.WithCancel(context.Background()) + server.scheduler, _ = testMockScheduler(ctx, &wg, c, sources, workers, "invalid-encrypt-password", t.workerClients) resp, err = server.CheckTask(context.Background(), &pb.CheckTaskRequest{ Task: taskConfig, }) c.Assert(err, check.IsNil) c.Assert(resp.Result, check.IsFalse) + clearSchedulerEnv(c, cancel, &wg) } func (t *testMaster) TestStartTask(c *check.C) { @@ -353,17 +348,27 @@ func (t *testMaster) TestStartTask(c *check.C) { }) c.Assert(err, check.IsNil) c.Assert(resp.Result, check.IsFalse) - workerCfg := testGenSubTaskConfig(c, server, ctrl) // test start task successfully - testMockStartTask(c, server, ctrl, workerCfg, true) - server.coordinator = testMockCoordinator(c, sources, workers, "", server.workerClients) + var wg sync.WaitGroup + ctx, cancel := context.WithCancel(context.Background()) + server.scheduler, _ = testMockScheduler(ctx, &wg, c, sources, workers, "", makeNilWorkerClients(workers)) resp, err = server.StartTask(context.Background(), &pb.StartTaskRequest{ Task: taskConfig, Sources: sources, }) c.Assert(err, check.IsNil) c.Assert(resp.Result, check.IsTrue) + // taskName is relative to taskConfig + taskName := "test" + for _, source := range sources { + t.subTaskStageMatch(c, server.scheduler, taskName, source, pb.Stage_Running) + tcm, _, err2 := ha.GetSubTaskCfg(etcdTestCli, source, taskName, 0) + c.Assert(err2, check.IsNil) + c.Assert(tcm, check.HasKey, taskName) + c.Assert(tcm[taskName].Name, check.Equals, taskName) + c.Assert(tcm[taskName].SourceID, check.Equals, source) + } // check start-task with an invalid source invalidSource := "invalid-source" @@ -372,25 +377,11 @@ func (t *testMaster) TestStartTask(c *check.C) { Sources: []string{invalidSource}, }) c.Assert(err, check.IsNil) - c.Assert(resp.Result, check.IsTrue) + c.Assert(resp.Result, check.IsFalse) c.Assert(resp.Sources, check.HasLen, 1) c.Assert(resp.Sources[0].Result, check.IsFalse) c.Assert(resp.Sources[0].Source, check.Equals, invalidSource) - // test start sub task request to worker returns error - testMockStartTask(c, server, ctrl, workerCfg, false) - server.coordinator = testMockCoordinator(c, sources, workers, "", server.workerClients) - resp, err = server.StartTask(context.Background(), &pb.StartTaskRequest{ - Task: taskConfig, - }) - c.Assert(err, check.IsNil) - c.Assert(resp.Result, check.IsTrue) - c.Assert(resp.Sources, check.HasLen, 2) - for _, workerResp := range resp.Sources { - c.Assert(workerResp.Result, check.IsFalse) - c.Assert(workerResp.Msg, check.Matches, errGRPCFailedReg) - } - // test start task, but the first step check-task fails bakCheckSyncConfigFunc := checker.CheckSyncConfigFunc checker.CheckSyncConfigFunc = func(_ context.Context, _ []*config.SubTaskConfig) error { @@ -406,6 +397,7 @@ func (t *testMaster) TestStartTask(c *check.C) { c.Assert(err, check.IsNil) c.Assert(resp.Result, check.IsFalse) c.Assert(resp.Msg, check.Matches, errCheckSyncConfigReg) + clearSchedulerEnv(c, cancel, &wg) } func (t *testMaster) TestQueryError(c *check.C) { @@ -421,12 +413,15 @@ func (t *testMaster) TestQueryError(c *check.C) { gomock.Any(), &pb.QueryErrorRequest{}, ).Return(&pb.QueryErrorResponse{Result: true}, nil) - server.workerClients[deploy.Worker] = newMockRPCClient(mockWorkerClient) + t.workerClients[deploy.Worker] = newMockRPCClient(mockWorkerClient) } - server.coordinator = testMockCoordinator(c, sources, workers, "", server.workerClients) + var wg sync.WaitGroup + ctx, cancel := context.WithCancel(context.Background()) + server.scheduler, _ = testMockScheduler(ctx, &wg, c, sources, workers, "", t.workerClients) resp, err := server.QueryError(context.Background(), &pb.QueryErrorListRequest{}) c.Assert(err, check.IsNil) c.Assert(resp.Result, check.IsTrue) + clearSchedulerEnv(c, cancel, &wg) // query specified dm-worker[s] for _, deploy := range server.cfg.Deploy { @@ -435,9 +430,11 @@ func (t *testMaster) TestQueryError(c *check.C) { gomock.Any(), &pb.QueryErrorRequest{}, ).Return(&pb.QueryErrorResponse{Result: true}, nil) - server.workerClients[deploy.Worker] = newMockRPCClient(mockWorkerClient) + t.workerClients[deploy.Worker] = newMockRPCClient(mockWorkerClient) } - server.coordinator = testMockCoordinator(c, sources, workers, "", server.workerClients) + + ctx, cancel = context.WithCancel(context.Background()) + server.scheduler, _ = testMockScheduler(ctx, &wg, c, sources, workers, "", t.workerClients) resp, err = server.QueryError(context.Background(), &pb.QueryErrorListRequest{ Sources: sources, }) @@ -458,8 +455,8 @@ func (t *testMaster) TestQueryError(c *check.C) { }) c.Assert(err, check.IsNil) c.Assert(resp.Result, check.IsFalse) - c.Assert(resp.Msg, check.Matches, "task .* has no workers or not exist, can try `refresh-worker-tasks` cmd first") - + c.Assert(resp.Msg, check.Matches, "task .* has no source or not exist, can try `refresh-worker-tasks` cmd first") + clearSchedulerEnv(c, cancel, &wg) // TODO: test query with correct task name, this needs to add task first } @@ -483,93 +480,42 @@ func (t *testMaster) TestOperateTask(c *check.C) { c.Assert(resp.Result, check.IsFalse) c.Assert(resp.Msg, check.Equals, fmt.Sprintf("task %s has no source or not exist, please check the task name and status", taskName)) - // test operate-task while worker clients not found - server.taskSources[taskName] = sources - resp, err = server.OperateTask(context.Background(), &pb.OperateTaskRequest{ - Op: pauseOp, - Name: taskName, + // 1. start task + taskName = "test" + nilWorkerClients := makeNilWorkerClients(workers) + var wg sync.WaitGroup + ctx, cancel := context.WithCancel(context.Background()) + server.scheduler, _ = testMockScheduler(ctx, &wg, c, sources, workers, "", nilWorkerClients) + stResp, err := server.StartTask(context.Background(), &pb.StartTaskRequest{ + Task: taskConfig, + Sources: sources, }) c.Assert(err, check.IsNil) - c.Assert(resp.Result, check.IsTrue) - c.Assert(resp.Sources, check.HasLen, 2) - for _, subtaskResp := range resp.Sources { - c.Assert(subtaskResp.Op, check.Equals, pauseOp) - c.Assert(subtaskResp.Msg, check.Matches, ".* relevant worker-client not found") - } - - // test pause task successfully - for _, deploy := range server.cfg.Deploy { - mockWorkerClient := pbmock.NewMockWorkerClient(ctrl) - mockWorkerClient.EXPECT().OperateSubTask( - gomock.Any(), - &pb.OperateSubTaskRequest{ - Op: pauseOp, - Name: taskName, - }, - ).Return(&pb.OperateSubTaskResponse{ - Op: pauseOp, - Result: true, - }, nil) - - server.workerClients[deploy.Worker] = newMockRPCClient(mockWorkerClient) + c.Assert(stResp.Result, check.IsTrue) + for _, source := range sources { + t.subTaskStageMatch(c, server.scheduler, taskName, source, pb.Stage_Running) } - server.coordinator = testMockCoordinator(c, sources, workers, "", server.workerClients) + // 2. pause task resp, err = server.OperateTask(context.Background(), &pb.OperateTaskRequest{ Op: pauseOp, Name: taskName, }) c.Assert(err, check.IsNil) c.Assert(resp.Result, check.IsTrue) - c.Assert(resp.Op, check.Equals, pauseOp) - c.Assert(resp.Sources, check.HasLen, 2) - for _, subtaskResp := range resp.Sources { - c.Assert(subtaskResp.Op, check.Equals, pauseOp) - c.Assert(subtaskResp.Result, check.IsTrue) + for _, source := range sources { + t.subTaskStageMatch(c, server.scheduler, taskName, source, pb.Stage_Paused) } - - // test operate sub task to worker returns error - server.taskSources[taskName] = sources - for _, deploy := range server.cfg.Deploy { - mockWorkerClient := pbmock.NewMockWorkerClient(ctrl) - mockWorkerClient.EXPECT().OperateSubTask( - gomock.Any(), - &pb.OperateSubTaskRequest{ - Op: pb.TaskOp_Pause, - Name: taskName, - }, - ).Return(nil, errors.New(errGRPCFailed)) - server.workerClients[deploy.Worker] = newMockRPCClient(mockWorkerClient) - } - server.coordinator = testMockCoordinator(c, sources, workers, "", server.workerClients) + // 3. resume task resp, err = server.OperateTask(context.Background(), &pb.OperateTaskRequest{ - Op: pb.TaskOp_Pause, - Name: taskName, - Sources: sources, + Op: pb.TaskOp_Resume, + Name: taskName, }) c.Assert(err, check.IsNil) c.Assert(resp.Result, check.IsTrue) - c.Assert(resp.Sources, check.HasLen, 2) - for _, subtaskResp := range resp.Sources { - c.Assert(subtaskResp.Op, check.Equals, pauseOp) - c.Assert(subtaskResp.Msg, check.Matches, errGRPCFailedReg) + for _, source := range sources { + t.subTaskStageMatch(c, server.scheduler, taskName, source, pb.Stage_Running) } - - // test stop task successfully, remove partial workers - server.taskSources[taskName] = sources - mockWorkerClient := pbmock.NewMockWorkerClient(ctrl) - mockWorkerClient.EXPECT().OperateSubTask( - gomock.Any(), - &pb.OperateSubTaskRequest{ - Op: pb.TaskOp_Stop, - Name: taskName, - }, - ).Return(&pb.OperateSubTaskResponse{ - Op: pb.TaskOp_Stop, - Result: true, - }, nil) - - server.workerClients[workers[0]] = newMockRPCClient(mockWorkerClient) - server.coordinator = testMockCoordinator(c, sources, workers, "", server.workerClients) + // 4. test stop task successfully, remove partial sources resp, err = server.OperateTask(context.Background(), &pb.OperateTaskRequest{ Op: pb.TaskOp_Stop, Name: taskName, @@ -577,128 +523,16 @@ func (t *testMaster) TestOperateTask(c *check.C) { }) c.Assert(err, check.IsNil) c.Assert(resp.Result, check.IsTrue) - c.Assert(resp.Sources, check.HasLen, 1) - c.Assert(resp.Sources[0].Result, check.IsTrue) - c.Assert(server.taskSources, check.HasKey, taskName) - c.Assert(server.taskSources[taskName], check.DeepEquals, []string{sources[1]}) - - // test stop task successfully, remove all workers - server.taskSources[taskName] = sources - for _, deploy := range server.cfg.Deploy { - mockWorkerClient := pbmock.NewMockWorkerClient(ctrl) - mockWorkerClient.EXPECT().OperateSubTask( - gomock.Any(), - &pb.OperateSubTaskRequest{ - Op: pb.TaskOp_Stop, - Name: taskName, - }, - ).Return(&pb.OperateSubTaskResponse{ - Op: pb.TaskOp_Stop, - Result: true, - }, nil) - - server.workerClients[deploy.Worker] = newMockRPCClient(mockWorkerClient) - } - server.coordinator = testMockCoordinator(c, sources, workers, "", server.workerClients) + c.Assert(server.getTaskResources(taskName), check.DeepEquals, []string{sources[1]}) + // 5. test stop task successfully, remove all workers resp, err = server.OperateTask(context.Background(), &pb.OperateTaskRequest{ Op: pb.TaskOp_Stop, Name: taskName, }) c.Assert(err, check.IsNil) c.Assert(resp.Result, check.IsTrue) - c.Assert(resp.Sources, check.HasLen, 2) - for _, subtaskResp := range resp.Sources { - c.Assert(subtaskResp.Op, check.Equals, pb.TaskOp_Stop) - c.Assert(subtaskResp.Result, check.IsTrue) - } - c.Assert(len(server.taskSources), check.Equals, 0) -} - -func (t *testMaster) TestUpdateTask(c *check.C) { - ctrl := gomock.NewController(c) - defer ctrl.Finish() - - server := testDefaultMasterServer(c) - sources, workers := extractWorkerSource(server.cfg.Deploy) - - // s.generateSubTask with error - resp, err := server.UpdateTask(context.Background(), &pb.UpdateTaskRequest{ - Task: "invalid toml config", - }) - c.Assert(err, check.IsNil) - c.Assert(resp.Result, check.IsFalse) - workerCfg := testGenSubTaskConfig(c, server, ctrl) - - mockUpdateTask := func(rpcSuccess bool) { - for _, deploy := range server.cfg.Deploy { - mockWorkerClient := pbmock.NewMockWorkerClient(ctrl) - - stCfg, ok := workerCfg[deploy.Worker] - c.Assert(ok, check.IsTrue) - stCfgToml, err3 := stCfg.Toml() - c.Assert(err3, check.IsNil) - - // mock update sub task - rets := make([]interface{}, 0, 2) - if rpcSuccess { - rets = []interface{}{ - &pb.CommonWorkerResponse{ - Result: true, - Source: deploy.Source, - }, - nil, - } - } else { - rets = []interface{}{ - nil, - errors.New(errGRPCFailed), - } - } - mockWorkerClient.EXPECT().UpdateSubTask( - gomock.Any(), - &pb.UpdateSubTaskRequest{Task: stCfgToml}, - ).Return(rets...) - - server.workerClients[deploy.Worker] = newMockRPCClient(mockWorkerClient) - } - } - - // test update task successfully - mockUpdateTask(true) - server.coordinator = testMockCoordinator(c, sources, workers, "", server.workerClients) - resp, err = server.UpdateTask(context.Background(), &pb.UpdateTaskRequest{ - Task: taskConfig, - Sources: sources, - }) - c.Assert(err, check.IsNil) - c.Assert(resp.Result, check.IsTrue) - - // check update-task with an invalid source - invalidSource := "invalid-source" - server.coordinator = testMockCoordinator(c, sources, workers, "", server.workerClients) - resp, err = server.UpdateTask(context.Background(), &pb.UpdateTaskRequest{ - Task: taskConfig, - Sources: []string{invalidSource}, - }) - c.Assert(err, check.IsNil) - c.Assert(resp.Result, check.IsTrue) - c.Assert(resp.Sources, check.HasLen, 1) - c.Assert(resp.Sources[0].Result, check.IsFalse) - c.Assert(resp.Sources[0].Source, check.Equals, invalidSource) - - // test update sub task request to worker returns error - mockUpdateTask(false) - server.coordinator = testMockCoordinator(c, sources, workers, "", server.workerClients) - resp, err = server.UpdateTask(context.Background(), &pb.UpdateTaskRequest{ - Task: taskConfig, - }) - c.Assert(err, check.IsNil) - c.Assert(resp.Result, check.IsTrue) - c.Assert(resp.Sources, check.HasLen, 2) - for _, workerResp := range resp.Sources { - c.Assert(workerResp.Result, check.IsFalse) - c.Assert(workerResp.Msg, check.Matches, errGRPCFailedReg) - } + c.Assert(len(server.getTaskResources(taskName)), check.Equals, 0) + clearSchedulerEnv(c, cancel, &wg) } func (t *testMaster) TestPurgeWorkerRelay(c *check.C) { @@ -738,7 +572,7 @@ func (t *testMaster) TestPurgeWorkerRelay(c *check.C) { Filename: filename, }, ).Return(rets...) - server.workerClients[deploy.Worker] = newMockRPCClient(mockWorkerClient) + t.workerClients[deploy.Worker] = newMockRPCClient(mockWorkerClient) } } @@ -756,9 +590,11 @@ func (t *testMaster) TestPurgeWorkerRelay(c *check.C) { c.Assert(w.Msg, check.Matches, ".*relevant worker-client not found") } + var wg sync.WaitGroup + ctx, cancel := context.WithCancel(context.Background()) // test PurgeWorkerRelay successfully mockPurgeRelay(true) - server.coordinator = testMockCoordinator(c, sources, workers, "", server.workerClients) + server.scheduler, _ = testMockScheduler(ctx, &wg, c, sources, workers, "", t.workerClients) resp, err = server.PurgeWorkerRelay(context.Background(), &pb.PurgeWorkerRelayRequest{ Sources: sources, Time: now, @@ -770,10 +606,12 @@ func (t *testMaster) TestPurgeWorkerRelay(c *check.C) { for _, w := range resp.Sources { c.Assert(w.Result, check.IsTrue) } + clearSchedulerEnv(c, cancel, &wg) + ctx, cancel = context.WithCancel(context.Background()) // test PurgeWorkerRelay with error response mockPurgeRelay(false) - server.coordinator = testMockCoordinator(c, sources, workers, "", server.workerClients) + server.scheduler, _ = testMockScheduler(ctx, &wg, c, sources, workers, "", t.workerClients) resp, err = server.PurgeWorkerRelay(context.Background(), &pb.PurgeWorkerRelayRequest{ Sources: sources, Time: now, @@ -786,6 +624,7 @@ func (t *testMaster) TestPurgeWorkerRelay(c *check.C) { c.Assert(w.Result, check.IsFalse) c.Assert(w.Msg, check.Matches, errGRPCFailedReg) } + clearSchedulerEnv(c, cancel, &wg) } func (t *testMaster) TestSwitchWorkerRelayMaster(c *check.C) { @@ -818,7 +657,7 @@ func (t *testMaster) TestSwitchWorkerRelayMaster(c *check.C) { gomock.Any(), &pb.SwitchRelayMasterRequest{}, ).Return(rets...) - server.workerClients[deploy.Worker] = newMockRPCClient(mockWorkerClient) + t.workerClients[deploy.Worker] = newMockRPCClient(mockWorkerClient) } } @@ -834,9 +673,11 @@ func (t *testMaster) TestSwitchWorkerRelayMaster(c *check.C) { c.Assert(w.Msg, check.Matches, "(?m).*relevant worker-client not found.*") } + var wg sync.WaitGroup + ctx, cancel := context.WithCancel(context.Background()) // test SwitchWorkerRelayMaster successfully mockSwitchRelayMaster(true) - server.coordinator = testMockCoordinator(c, sources, workers, "", server.workerClients) + server.scheduler, _ = testMockScheduler(ctx, &wg, c, sources, workers, "", t.workerClients) resp, err = server.SwitchWorkerRelayMaster(context.Background(), &pb.SwitchWorkerRelayMasterRequest{ Sources: sources, }) @@ -846,10 +687,12 @@ func (t *testMaster) TestSwitchWorkerRelayMaster(c *check.C) { for _, w := range resp.Sources { c.Assert(w.Result, check.IsTrue) } + clearSchedulerEnv(c, cancel, &wg) + ctx, cancel = context.WithCancel(context.Background()) // test SwitchWorkerRelayMaster with error response mockSwitchRelayMaster(false) - server.coordinator = testMockCoordinator(c, sources, workers, "", server.workerClients) + server.scheduler, _ = testMockScheduler(ctx, &wg, c, sources, workers, "", t.workerClients) resp, err = server.SwitchWorkerRelayMaster(context.Background(), &pb.SwitchWorkerRelayMasterRequest{ Sources: sources, }) @@ -860,6 +703,7 @@ func (t *testMaster) TestSwitchWorkerRelayMaster(c *check.C) { c.Assert(w.Result, check.IsFalse) c.Assert(w.Msg, check.Matches, errGRPCFailedReg) } + clearSchedulerEnv(c, cancel, &wg) } func (t *testMaster) TestOperateWorkerRelayTask(c *check.C) { @@ -868,34 +712,9 @@ func (t *testMaster) TestOperateWorkerRelayTask(c *check.C) { server := testDefaultMasterServer(c) sources, workers := extractWorkerSource(server.cfg.Deploy) - - // mock OperateRelay request - mockOperateRelay := func(rpcSuccess bool) { - for _, deploy := range server.cfg.Deploy { - rets := make([]interface{}, 0, 2) - if rpcSuccess { - rets = []interface{}{ - &pb.OperateRelayResponse{ - Result: true, - Source: deploy.Source, - Op: pb.RelayOp_PauseRelay, - }, - nil, - } - } else { - rets = []interface{}{ - nil, - errors.New(errGRPCFailed), - } - } - mockWorkerClient := pbmock.NewMockWorkerClient(ctrl) - mockWorkerClient.EXPECT().OperateRelay( - gomock.Any(), - &pb.OperateRelayRequest{Op: pb.RelayOp_PauseRelay}, - ).Return(rets...) - server.workerClients[deploy.Worker] = newMockRPCClient(mockWorkerClient) - } - } + var wg sync.WaitGroup + ctx, cancel := context.WithCancel(context.Background()) + server.scheduler, _ = testMockScheduler(ctx, &wg, c, sources, workers, "", makeNilWorkerClients(workers)) // test OperateWorkerRelayTask with invalid dm-worker[s] resp, err := server.OperateWorkerRelayTask(context.Background(), &pb.OperateWorkerRelayRequest{ @@ -903,42 +722,30 @@ func (t *testMaster) TestOperateWorkerRelayTask(c *check.C) { Op: pb.RelayOp_PauseRelay, }) c.Assert(err, check.IsNil) - c.Assert(resp.Result, check.IsTrue) - c.Assert(resp.Sources, check.HasLen, 2) - for _, w := range resp.Sources { - c.Assert(w.Result, check.IsFalse) - c.Assert(w.Msg, check.Matches, ".*relevant worker-client not found") - } + c.Assert(resp.Result, check.IsFalse) + c.Assert(resp.Msg, check.Matches, `[\s\S]*need to update expectant relay stage not exist[\s\S]*`) - // test OperateWorkerRelayTask successfully - mockOperateRelay(true) - server.coordinator = testMockCoordinator(c, sources, workers, "", server.workerClients) + // 1. test pause-relay successfully resp, err = server.OperateWorkerRelayTask(context.Background(), &pb.OperateWorkerRelayRequest{ Sources: sources, Op: pb.RelayOp_PauseRelay, }) c.Assert(err, check.IsNil) c.Assert(resp.Result, check.IsTrue) - c.Assert(resp.Sources, check.HasLen, 2) - for _, w := range resp.Sources { - c.Assert(w.Result, check.IsTrue) - c.Assert(w.Op, check.Equals, pb.RelayOp_PauseRelay) + for _, source := range sources { + t.relayStageMatch(c, server.scheduler, source, pb.Stage_Paused) } - - // test OperateWorkerRelayTask with error response - mockOperateRelay(false) - server.coordinator = testMockCoordinator(c, sources, workers, "", server.workerClients) + // 2. test resume-relay successfully resp, err = server.OperateWorkerRelayTask(context.Background(), &pb.OperateWorkerRelayRequest{ Sources: sources, - Op: pb.RelayOp_PauseRelay, + Op: pb.RelayOp_ResumeRelay, }) c.Assert(err, check.IsNil) c.Assert(resp.Result, check.IsTrue) - c.Assert(resp.Sources, check.HasLen, 2) - for _, w := range resp.Sources { - c.Assert(w.Result, check.IsFalse) - c.Assert(w.Msg, check.Matches, errGRPCFailedReg) + for _, source := range sources { + t.relayStageMatch(c, server.scheduler, source, pb.Stage_Running) } + clearSchedulerEnv(c, cancel, &wg) } func (t *testMaster) TestServer(c *check.C) { @@ -957,7 +764,7 @@ func (t *testMaster) TestServer(c *check.C) { t.testHTTPInterface(c, fmt.Sprintf("http://%s/status", cfg.MasterAddr), []byte(utils.GetRawInfo())) t.testHTTPInterface(c, fmt.Sprintf("http://%s/debug/pprof/", cfg.MasterAddr), []byte("Types of profiles available")) // HTTP API in this unit test is unstable, but we test it in `http_apis` in integration test. - //t.testHTTPInterface(c, fmt.Sprintf("http://%s/apis/v1alpha1/status/test-task", cfg.MasterAddr), []byte("task test-task has no workers or not exist")) + //t.testHTTPInterface(c, fmt.Sprintf("http://%s/apis/v1alpha1/status/test-task", cfg.MasterAddr), []byte("task test-task has no source or not exist")) dupServer := NewServer(cfg) err := dupServer.Start(ctx) @@ -1045,8 +852,9 @@ func (t *testMaster) TestJoinMember(c *check.C) { cancel() } -func (t *testMaster) TestOperateMysqlWorker(c *check.C) { - ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) +func (t *testMaster) TestOperateSource(c *check.C) { + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() ctrl := gomock.NewController(c) defer ctrl.Finish() @@ -1065,71 +873,62 @@ func (t *testMaster) TestOperateMysqlWorker(c *check.C) { s1.leader = oneselfLeader c.Assert(s1.Start(ctx), check.IsNil) defer s1.Close() - mysqlCfg := config.NewMysqlConfig() - mysqlCfg.LoadFromFile("./dm-mysql.toml") + mysqlCfg := config.NewSourceConfig() + mysqlCfg.LoadFromFile("./source.toml") task, err := mysqlCfg.Toml() c.Assert(err, check.IsNil) - // wait for coordinator to start - c.Assert(utils.WaitSomething(6, 500*time.Millisecond, func() bool { - return s1.coordinator.IsStarted() - }), check.IsTrue) + sourceID := mysqlCfg.SourceID + // 1. wait for scheduler to start + time.Sleep(3 * time.Second) - req := &pb.MysqlWorkerRequest{Op: pb.WorkerOp_StartWorker, Config: task} - resp, err := s1.OperateMysqlWorker(ctx, req) - c.Assert(err, check.IsNil) - c.Assert(resp.Result, check.Equals, false) - c.Assert(resp.Msg, check.Matches, "[\\s\\S]*acquire worker failed. no free worker could start mysql task[\\s\\S]*") - mockWorkerClient := pbmock.NewMockWorkerClient(ctrl) - req.Op = pb.WorkerOp_UpdateConfig - mockWorkerClient.EXPECT().OperateMysqlWorker( - gomock.Any(), - req, - ).Return(&pb.MysqlWorkerResponse{ - Result: true, - Msg: "", - }, nil) - req.Op = pb.WorkerOp_StopWorker - mockWorkerClient.EXPECT().OperateMysqlWorker( - gomock.Any(), - req, - ).Return(&pb.MysqlWorkerResponse{ - Result: true, - Msg: "", - }, nil) - req.Op = pb.WorkerOp_StartWorker - mockWorkerClient.EXPECT().OperateMysqlWorker( - gomock.Any(), - req, - ).Return(&pb.MysqlWorkerResponse{ - Result: true, - Msg: "", - }, nil) - s1.coordinator.AddWorker("", "localhost:10099", newMockRPCClient(mockWorkerClient)) - s1.coordinator.AddWorker("", "localhost:10099", nil) - resp, err = s1.OperateMysqlWorker(ctx, req) + // 2. try to add a new mysql source + req := &pb.OperateSourceRequest{Op: pb.SourceOp_StartSource, Config: task} + resp, err := s1.OperateSource(ctx, req) c.Assert(err, check.IsNil) + c.Log("aaa", resp) c.Assert(resp.Result, check.Equals, true) - resp, err = s1.OperateMysqlWorker(ctx, req) - c.Assert(err, check.IsNil) - c.Assert(resp.Result, check.Equals, false) - c.Assert(resp.Msg, check.Matches, ".*Create worker failed. worker has been started") - mysqlCfg.SourceID = "no-exist-source" + unBoundSources := s1.scheduler.UnboundSources() + c.Assert(unBoundSources, check.HasLen, 1) + c.Assert(unBoundSources[0], check.Equals, sourceID) + + // 3. try to stop a non-exist-source + req.Op = pb.SourceOp_StopSource + mysqlCfg.SourceID = "not-exist-source" task2, err := mysqlCfg.Toml() c.Assert(err, check.IsNil) req.Config = task2 - req.Op = pb.WorkerOp_UpdateConfig - resp, err = s1.OperateMysqlWorker(ctx, req) + resp, err = s1.OperateSource(ctx, req) c.Assert(err, check.IsNil) c.Assert(resp.Result, check.Equals, false) - c.Assert(resp.Msg, check.Matches, ".*Update worker config failed. worker has not been started") + c.Assert(resp.Msg, check.Matches, `[\s\S]*source config with ID `+mysqlCfg.SourceID+` not exists[\s\S]*`) + + // 4. start a new worker, the unbounded source should be bounded + var wg sync.WaitGroup + ctx1, cancel1 := context.WithCancel(ctx) + workerName := "worker1" + defer func() { + clearSchedulerEnv(c, cancel1, &wg) + }() + c.Assert(s1.scheduler.AddWorker(workerName, "172.16.10.72:8262"), check.IsNil) + wg.Add(1) + go func(ctx context.Context, workerName string) { + defer wg.Done() + c.Assert(ha.KeepAlive(ctx, s1.etcdClient, workerName, keepAliveTTL), check.IsNil) + }(ctx1, workerName) + c.Assert(utils.WaitSomething(30, 10*time.Millisecond, func() bool { + w := s1.scheduler.GetWorkerBySource(sourceID) + return w != nil && w.BaseInfo().Name == workerName + }), check.IsTrue) + + // 5. stop this source req.Config = task - resp, err = s1.OperateMysqlWorker(ctx, req) + req.Op = pb.SourceOp_StopSource + resp, err = s1.OperateSource(ctx, req) c.Assert(err, check.IsNil) c.Assert(resp.Result, check.Equals, true) - req.Op = pb.WorkerOp_StopWorker - resp, err = s1.OperateMysqlWorker(ctx, req) + emptyCfg, _, err := ha.GetSourceCfg(etcdTestCli, sourceID, 0) c.Assert(err, check.IsNil) - c.Assert(resp.Result, check.Equals, true) + c.Assert(emptyCfg, check.DeepEquals, config.SourceConfig{}) cancel() } @@ -1170,7 +969,7 @@ func (t *testMaster) TestOfflineWorker(c *check.C) { res, err := s1.OfflineWorker(ectx, req2) c.Assert(err, check.IsNil) c.Assert(res.Result, check.IsFalse) - c.Assert(res.Msg, check.Matches, ".*delete from etcd failed, please check whether the name and address of worker match.*") + c.Assert(res.Msg, check.Matches, `[\s\S]*dm-worker with name `+req2.Name+` not exists[\s\S]*`) } { req2.Name = "xixi" @@ -1179,3 +978,30 @@ func (t *testMaster) TestOfflineWorker(c *check.C) { c.Assert(res.Result, check.IsTrue) } } + +func (t *testMaster) relayStageMatch(c *check.C, s *scheduler.Scheduler, source string, expectStage pb.Stage) { + stage := ha.NewRelayStage(expectStage, source) + c.Assert(s.GetExpectRelayStage(source), check.DeepEquals, stage) + + eStage, _, err := ha.GetRelayStage(etcdTestCli, source) + c.Assert(err, check.IsNil) + switch expectStage { + case pb.Stage_Running, pb.Stage_Paused: + c.Assert(eStage, check.DeepEquals, stage) + } +} + +func (t *testMaster) subTaskStageMatch(c *check.C, s *scheduler.Scheduler, task, source string, expectStage pb.Stage) { + stage := ha.NewSubTaskStage(expectStage, source, task) + c.Assert(s.GetExpectSubTaskStage(task, source), check.DeepEquals, stage) + + eStageM, _, err := ha.GetSubTaskStage(etcdTestCli, source, task) + c.Assert(err, check.IsNil) + switch expectStage { + case pb.Stage_Running, pb.Stage_Paused: + c.Assert(eStageM, check.HasLen, 1) + c.Assert(eStageM[task], check.DeepEquals, stage) + default: + c.Assert(eStageM, check.HasLen, 0) + } +} diff --git a/dm/master/dm-mysql.toml b/dm/master/source.toml similarity index 100% rename from dm/master/dm-mysql.toml rename to dm/master/source.toml diff --git a/dm/master/workerrpc/interface.go b/dm/master/workerrpc/interface.go index c845de8c5a..450f3f676a 100644 --- a/dm/master/workerrpc/interface.go +++ b/dm/master/workerrpc/interface.go @@ -45,7 +45,6 @@ const ( CmdMigrateRelay CmdFetchDDLInfo - CmdOperateMysqlTask ) // Request wraps all dm-worker rpc requests. @@ -67,7 +66,6 @@ type Request struct { PurgeRelay *pb.PurgeRelayRequest UpdateRelay *pb.UpdateRelayRequest MigrateRelay *pb.MigrateRelayRequest - MysqlTask *pb.MysqlWorkerRequest } // Response wraps all dm-worker rpc responses. @@ -91,7 +89,6 @@ type Response struct { PurgeRelay *pb.CommonWorkerResponse UpdateRelay *pb.CommonWorkerResponse MigrateRelay *pb.CommonWorkerResponse - MysqlTask *pb.MysqlWorkerResponse } // Client is a client that sends RPC. diff --git a/dm/master/workerrpc/rawgrpc.go b/dm/master/workerrpc/rawgrpc.go index 4255900491..5b60c1d992 100644 --- a/dm/master/workerrpc/rawgrpc.go +++ b/dm/master/workerrpc/rawgrpc.go @@ -119,8 +119,6 @@ func callRPC(ctx context.Context, client pb.WorkerClient, req *Request) (*Respon resp.UpdateRelay, err = client.UpdateRelayConfig(ctx, req.UpdateRelay) case CmdMigrateRelay: resp.MigrateRelay, err = client.MigrateRelay(ctx, req.MigrateRelay) - case CmdOperateMysqlTask: - resp.MysqlTask, err = client.OperateMysqlWorker(ctx, req.MysqlTask) default: return nil, terror.ErrMasterGRPCInvalidReqType.Generate(req.Type) } diff --git a/dm/pb/dmmaster.pb.go b/dm/pb/dmmaster.pb.go index 7501ef03e8..570571d3be 100644 --- a/dm/pb/dmmaster.pb.go +++ b/dm/pb/dmmaster.pb.go @@ -28,6 +28,37 @@ var _ = math.Inf // proto package needs to be updated. const _ = proto.GoGoProtoPackageIsVersion3 // please upgrade the proto package +type SourceOp int32 + +const ( + SourceOp_InvalidSourceOp SourceOp = 0 + SourceOp_StartSource SourceOp = 1 + SourceOp_UpdateSource SourceOp = 2 + SourceOp_StopSource SourceOp = 3 +) + +var SourceOp_name = map[int32]string{ + 0: "InvalidSourceOp", + 1: "StartSource", + 2: "UpdateSource", + 3: "StopSource", +} + +var SourceOp_value = map[string]int32{ + "InvalidSourceOp": 0, + "StartSource": 1, + "UpdateSource": 2, + "StopSource": 3, +} + +func (x SourceOp) String() string { + return proto.EnumName(SourceOp_name, int32(x)) +} + +func (SourceOp) EnumDescriptor() ([]byte, []int) { + return fileDescriptor_f9bef11f2a341f03, []int{0} +} + type MigrateWorkerRelayRequest struct { BinlogName string `protobuf:"bytes,1,opt,name=BinlogName,proto3" json:"BinlogName,omitempty"` BinlogPos uint32 `protobuf:"varint,2,opt,name=BinlogPos,proto3" json:"BinlogPos,omitempty"` @@ -1928,6 +1959,110 @@ func (m *CheckTaskResponse) GetMsg() string { return "" } +type OperateSourceRequest struct { + Op SourceOp `protobuf:"varint,1,opt,name=op,proto3,enum=pb.SourceOp" json:"op,omitempty"` + Config string `protobuf:"bytes,2,opt,name=config,proto3" json:"config,omitempty"` +} + +func (m *OperateSourceRequest) Reset() { *m = OperateSourceRequest{} } +func (m *OperateSourceRequest) String() string { return proto.CompactTextString(m) } +func (*OperateSourceRequest) ProtoMessage() {} +func (*OperateSourceRequest) Descriptor() ([]byte, []int) { + return fileDescriptor_f9bef11f2a341f03, []int{31} +} +func (m *OperateSourceRequest) XXX_Unmarshal(b []byte) error { + return m.Unmarshal(b) +} +func (m *OperateSourceRequest) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + if deterministic { + return xxx_messageInfo_OperateSourceRequest.Marshal(b, m, deterministic) + } else { + b = b[:cap(b)] + n, err := m.MarshalToSizedBuffer(b) + if err != nil { + return nil, err + } + return b[:n], nil + } +} +func (m *OperateSourceRequest) XXX_Merge(src proto.Message) { + xxx_messageInfo_OperateSourceRequest.Merge(m, src) +} +func (m *OperateSourceRequest) XXX_Size() int { + return m.Size() +} +func (m *OperateSourceRequest) XXX_DiscardUnknown() { + xxx_messageInfo_OperateSourceRequest.DiscardUnknown(m) +} + +var xxx_messageInfo_OperateSourceRequest proto.InternalMessageInfo + +func (m *OperateSourceRequest) GetOp() SourceOp { + if m != nil { + return m.Op + } + return SourceOp_InvalidSourceOp +} + +func (m *OperateSourceRequest) GetConfig() string { + if m != nil { + return m.Config + } + return "" +} + +type OperateSourceResponse struct { + Result bool `protobuf:"varint,1,opt,name=result,proto3" json:"result,omitempty"` + Msg string `protobuf:"bytes,2,opt,name=msg,proto3" json:"msg,omitempty"` +} + +func (m *OperateSourceResponse) Reset() { *m = OperateSourceResponse{} } +func (m *OperateSourceResponse) String() string { return proto.CompactTextString(m) } +func (*OperateSourceResponse) ProtoMessage() {} +func (*OperateSourceResponse) Descriptor() ([]byte, []int) { + return fileDescriptor_f9bef11f2a341f03, []int{32} +} +func (m *OperateSourceResponse) XXX_Unmarshal(b []byte) error { + return m.Unmarshal(b) +} +func (m *OperateSourceResponse) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + if deterministic { + return xxx_messageInfo_OperateSourceResponse.Marshal(b, m, deterministic) + } else { + b = b[:cap(b)] + n, err := m.MarshalToSizedBuffer(b) + if err != nil { + return nil, err + } + return b[:n], nil + } +} +func (m *OperateSourceResponse) XXX_Merge(src proto.Message) { + xxx_messageInfo_OperateSourceResponse.Merge(m, src) +} +func (m *OperateSourceResponse) XXX_Size() int { + return m.Size() +} +func (m *OperateSourceResponse) XXX_DiscardUnknown() { + xxx_messageInfo_OperateSourceResponse.DiscardUnknown(m) +} + +var xxx_messageInfo_OperateSourceResponse proto.InternalMessageInfo + +func (m *OperateSourceResponse) GetResult() bool { + if m != nil { + return m.Result + } + return false +} + +func (m *OperateSourceResponse) GetMsg() string { + if m != nil { + return m.Msg + } + return "" +} + type RegisterWorkerRequest struct { Name string `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"` Address string `protobuf:"bytes,2,opt,name=address,proto3" json:"address,omitempty"` @@ -1937,7 +2072,7 @@ func (m *RegisterWorkerRequest) Reset() { *m = RegisterWorkerRequest{} } func (m *RegisterWorkerRequest) String() string { return proto.CompactTextString(m) } func (*RegisterWorkerRequest) ProtoMessage() {} func (*RegisterWorkerRequest) Descriptor() ([]byte, []int) { - return fileDescriptor_f9bef11f2a341f03, []int{31} + return fileDescriptor_f9bef11f2a341f03, []int{33} } func (m *RegisterWorkerRequest) XXX_Unmarshal(b []byte) error { return m.Unmarshal(b) @@ -1989,7 +2124,7 @@ func (m *RegisterWorkerResponse) Reset() { *m = RegisterWorkerResponse{} func (m *RegisterWorkerResponse) String() string { return proto.CompactTextString(m) } func (*RegisterWorkerResponse) ProtoMessage() {} func (*RegisterWorkerResponse) Descriptor() ([]byte, []int) { - return fileDescriptor_f9bef11f2a341f03, []int{32} + return fileDescriptor_f9bef11f2a341f03, []int{34} } func (m *RegisterWorkerResponse) XXX_Unmarshal(b []byte) error { return m.Unmarshal(b) @@ -2041,7 +2176,7 @@ func (m *OfflineWorkerRequest) Reset() { *m = OfflineWorkerRequest{} } func (m *OfflineWorkerRequest) String() string { return proto.CompactTextString(m) } func (*OfflineWorkerRequest) ProtoMessage() {} func (*OfflineWorkerRequest) Descriptor() ([]byte, []int) { - return fileDescriptor_f9bef11f2a341f03, []int{33} + return fileDescriptor_f9bef11f2a341f03, []int{35} } func (m *OfflineWorkerRequest) XXX_Unmarshal(b []byte) error { return m.Unmarshal(b) @@ -2093,7 +2228,7 @@ func (m *OfflineWorkerResponse) Reset() { *m = OfflineWorkerResponse{} } func (m *OfflineWorkerResponse) String() string { return proto.CompactTextString(m) } func (*OfflineWorkerResponse) ProtoMessage() {} func (*OfflineWorkerResponse) Descriptor() ([]byte, []int) { - return fileDescriptor_f9bef11f2a341f03, []int{34} + return fileDescriptor_f9bef11f2a341f03, []int{36} } func (m *OfflineWorkerResponse) XXX_Unmarshal(b []byte) error { return m.Unmarshal(b) @@ -2137,6 +2272,7 @@ func (m *OfflineWorkerResponse) GetMsg() string { } func init() { + proto.RegisterEnum("pb.SourceOp", SourceOp_name, SourceOp_value) proto.RegisterType((*MigrateWorkerRelayRequest)(nil), "pb.MigrateWorkerRelayRequest") proto.RegisterType((*UpdateWorkerRelayConfigRequest)(nil), "pb.UpdateWorkerRelayConfigRequest") proto.RegisterType((*StartTaskRequest)(nil), "pb.StartTaskRequest") @@ -2168,6 +2304,8 @@ func init() { proto.RegisterType((*PurgeWorkerRelayResponse)(nil), "pb.PurgeWorkerRelayResponse") proto.RegisterType((*CheckTaskRequest)(nil), "pb.CheckTaskRequest") proto.RegisterType((*CheckTaskResponse)(nil), "pb.CheckTaskResponse") + proto.RegisterType((*OperateSourceRequest)(nil), "pb.OperateSourceRequest") + proto.RegisterType((*OperateSourceResponse)(nil), "pb.OperateSourceResponse") proto.RegisterType((*RegisterWorkerRequest)(nil), "pb.RegisterWorkerRequest") proto.RegisterType((*RegisterWorkerResponse)(nil), "pb.RegisterWorkerResponse") proto.RegisterType((*OfflineWorkerRequest)(nil), "pb.OfflineWorkerRequest") @@ -2177,96 +2315,100 @@ func init() { func init() { proto.RegisterFile("dmmaster.proto", fileDescriptor_f9bef11f2a341f03) } var fileDescriptor_f9bef11f2a341f03 = []byte{ - // 1413 bytes of a gzipped FileDescriptorProto + // 1483 bytes of a gzipped FileDescriptorProto 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0xbc, 0x58, 0x4d, 0x6f, 0xdb, 0x46, - 0x13, 0x16, 0x25, 0x5b, 0x89, 0xc6, 0x89, 0x61, 0x6f, 0x64, 0x89, 0x62, 0x1c, 0xc5, 0x2f, 0xdf, - 0x22, 0x30, 0x7a, 0xb0, 0x6b, 0xa7, 0x27, 0x03, 0x01, 0x1a, 0x5b, 0x4e, 0x6b, 0x40, 0xae, 0x6d, - 0xaa, 0x46, 0x91, 0x4b, 0x01, 0x4a, 0x5a, 0xc9, 0x84, 0x28, 0x92, 0x26, 0x29, 0x3b, 0x6e, 0x51, - 0x14, 0xe8, 0xa1, 0x97, 0x1e, 0xda, 0xa2, 0x87, 0x9c, 0xfb, 0x6f, 0x72, 0x0c, 0xd0, 0x4b, 0x8f, - 0x85, 0xdd, 0x1f, 0x52, 0xec, 0x87, 0xc8, 0xe5, 0x97, 0x62, 0xb9, 0x80, 0x6e, 0xdc, 0x1d, 0xee, - 0x33, 0xcf, 0xcc, 0xee, 0xce, 0x3c, 0x24, 0x2c, 0x76, 0x87, 0x43, 0xdd, 0xf3, 0xb1, 0xbb, 0xe1, - 0xb8, 0xb6, 0x6f, 0xa3, 0xbc, 0xd3, 0x56, 0x16, 0xbb, 0xc3, 0x4b, 0xdb, 0x1d, 0x8c, 0xe7, 0x94, - 0xd5, 0xbe, 0x6d, 0xf7, 0x4d, 0xbc, 0xa9, 0x3b, 0xc6, 0xa6, 0x6e, 0x59, 0xb6, 0xaf, 0xfb, 0x86, - 0x6d, 0x79, 0xcc, 0xaa, 0x9e, 0x43, 0xed, 0xd0, 0xe8, 0xbb, 0xba, 0x8f, 0xbf, 0xa6, 0x8b, 0x34, - 0x6c, 0xea, 0x57, 0x1a, 0x3e, 0x1f, 0x61, 0xcf, 0x47, 0x75, 0x80, 0x5d, 0xc3, 0x32, 0xed, 0xfe, - 0x97, 0xfa, 0x10, 0xcb, 0xd2, 0x9a, 0xb4, 0x5e, 0xd2, 0x84, 0x19, 0xb4, 0x0a, 0x25, 0x36, 0x3a, - 0xb6, 0x3d, 0x39, 0xbf, 0x26, 0xad, 0x3f, 0xd4, 0xc2, 0x09, 0x54, 0x81, 0xa2, 0x67, 0x8f, 0xdc, - 0x0e, 0x96, 0x0b, 0x74, 0x25, 0x1f, 0xa9, 0xc7, 0x50, 0x3f, 0x75, 0xba, 0x51, 0x8f, 0x7b, 0xb6, - 0xd5, 0x33, 0xfa, 0x63, 0xbf, 0x15, 0x28, 0x76, 0xe8, 0x04, 0xf7, 0xc9, 0x47, 0x02, 0x62, 0x3e, - 0x82, 0xf8, 0x19, 0x2c, 0xb5, 0x7c, 0xdd, 0xf5, 0xbf, 0xd2, 0xbd, 0xc1, 0x18, 0x03, 0xc1, 0x9c, - 0xaf, 0x7b, 0x03, 0x8e, 0x40, 0x9f, 0x91, 0x0c, 0xf7, 0xd8, 0x0a, 0xc2, 0xb6, 0xb0, 0x5e, 0xd2, - 0xc6, 0x43, 0xf5, 0x1c, 0x96, 0x05, 0x04, 0xcf, 0xb1, 0x2d, 0x0f, 0x13, 0x77, 0x2e, 0xf6, 0x46, - 0xa6, 0x4f, 0x41, 0xee, 0x6b, 0x7c, 0x84, 0x96, 0xa0, 0x30, 0xf4, 0xfa, 0x9c, 0x03, 0x79, 0x44, - 0xdb, 0x21, 0x70, 0x61, 0xad, 0xb0, 0xbe, 0xb0, 0x2d, 0x6f, 0x38, 0xed, 0x8d, 0x3d, 0x7b, 0x38, - 0xb4, 0xad, 0x71, 0x94, 0x0c, 0x34, 0x74, 0xf9, 0x1c, 0x6a, 0x2c, 0x0d, 0x87, 0x74, 0x07, 0x6f, - 0x95, 0x01, 0xf5, 0x0a, 0x94, 0xb4, 0x45, 0x53, 0x13, 0xde, 0x8a, 0x13, 0xae, 0x12, 0xc2, 0x27, - 0x23, 0xec, 0x5e, 0xb5, 0x7c, 0xdd, 0x1f, 0x79, 0x49, 0xbe, 0xdf, 0x00, 0x3a, 0x72, 0x30, 0x39, - 0x29, 0x62, 0x9a, 0x15, 0xc8, 0xdb, 0x0e, 0x75, 0xb7, 0xb8, 0x0d, 0x04, 0x83, 0x18, 0x8f, 0x1c, - 0x2d, 0x6f, 0x3b, 0x64, 0x0b, 0x2c, 0x72, 0x70, 0x98, 0x5f, 0xfa, 0x2c, 0x6e, 0x41, 0x21, 0xba, - 0x05, 0xbf, 0x49, 0xf0, 0x28, 0xe2, 0x80, 0x07, 0x35, 0xc9, 0x43, 0x18, 0x70, 0x3e, 0x2d, 0xe0, - 0x42, 0x18, 0xf0, 0xa7, 0xa1, 0xdf, 0x39, 0x1a, 0xb0, 0x42, 0xa0, 0xb8, 0xbf, 0xd6, 0xa8, 0x2d, - 0xba, 0x0c, 0x39, 0xbd, 0x84, 0x65, 0x96, 0xee, 0xbb, 0x9f, 0x2c, 0x17, 0x90, 0x08, 0x31, 0x93, - 0xa3, 0xf5, 0x0a, 0x2a, 0xc2, 0x56, 0x36, 0x0d, 0xcf, 0x17, 0xb8, 0x5b, 0xe1, 0x5d, 0x4e, 0x6c, - 0x49, 0x8c, 0xfb, 0x05, 0x54, 0x13, 0x38, 0xb3, 0x38, 0x6a, 0xfb, 0xb0, 0x42, 0xed, 0xfb, 0xae, - 0x6b, 0xbb, 0x77, 0xa7, 0xef, 0xf3, 0x34, 0x08, 0x30, 0x53, 0xb3, 0xff, 0x24, 0xce, 0xbe, 0x12, - 0xb0, 0xa7, 0xb0, 0x49, 0xf2, 0x7b, 0xf0, 0xa8, 0x75, 0x66, 0x5f, 0x36, 0x1a, 0xcd, 0xa6, 0xdd, - 0x19, 0x78, 0x77, 0x3b, 0x35, 0x3f, 0x4b, 0x70, 0x8f, 0x23, 0xa0, 0x45, 0xc8, 0x1f, 0x34, 0xf8, - 0xba, 0xfc, 0x41, 0x23, 0x40, 0xca, 0x0b, 0x48, 0x65, 0x98, 0xb7, 0x2f, 0x2d, 0xec, 0xf2, 0x23, - 0xcf, 0x06, 0xe4, 0xcd, 0x46, 0xa3, 0xc9, 0x4e, 0x7c, 0x49, 0xa3, 0xcf, 0xb4, 0x86, 0x5e, 0x59, - 0x1d, 0xdc, 0x95, 0xe7, 0xe9, 0x2c, 0x1f, 0x21, 0x05, 0xee, 0x8f, 0x2c, 0x6e, 0x29, 0x52, 0x4b, - 0x30, 0x56, 0x3b, 0x50, 0x8e, 0x86, 0x34, 0x75, 0x1a, 0xff, 0x07, 0xf3, 0x26, 0x59, 0xca, 0x93, - 0xb8, 0x40, 0x92, 0xc8, 0xe1, 0x34, 0x66, 0x51, 0x7f, 0x92, 0xa0, 0x7c, 0x6a, 0x91, 0xe7, 0xb1, - 0x81, 0x67, 0x2e, 0x1e, 0xbf, 0x0a, 0x0f, 0x5c, 0xec, 0x98, 0x7a, 0x07, 0x1f, 0xd1, 0x90, 0x99, - 0x9b, 0xc8, 0x5c, 0x76, 0x99, 0x41, 0x6b, 0xb0, 0xd0, 0xb3, 0xdd, 0x0e, 0xd6, 0xf0, 0xd0, 0xbe, - 0xc0, 0xf2, 0x1c, 0x25, 0x2e, 0x4e, 0xa9, 0x23, 0x58, 0x89, 0xf1, 0xb8, 0xcb, 0xa5, 0x65, 0x3d, - 0xf8, 0x16, 0x97, 0x96, 0xbf, 0xa8, 0xfe, 0x21, 0x41, 0x6d, 0xd7, 0xc5, 0xfa, 0x80, 0xbd, 0x10, - 0x4b, 0x82, 0x10, 0x90, 0x14, 0x0d, 0x28, 0xed, 0x38, 0xd0, 0x14, 0x91, 0x60, 0x08, 0xc4, 0x41, - 0x83, 0x9f, 0x8a, 0xc8, 0x1c, 0x41, 0xc4, 0x6f, 0x70, 0xa7, 0xd1, 0x68, 0xf2, 0x24, 0x8c, 0x87, - 0xd4, 0xd7, 0xc0, 0x70, 0x88, 0x65, 0x9e, 0x59, 0xf8, 0x50, 0xfd, 0x16, 0x94, 0x34, 0x8a, 0x33, - 0x29, 0x6a, 0x3b, 0x50, 0x6f, 0x5d, 0x1a, 0x7e, 0xe7, 0x4c, 0x90, 0x0d, 0xac, 0x0b, 0x7e, 0x30, - 0x47, 0xea, 0x0f, 0xf0, 0x34, 0x73, 0xed, 0x4c, 0xc8, 0x6b, 0x50, 0xe3, 0xbd, 0x26, 0x45, 0x66, - 0x3d, 0x16, 0x3a, 0x1c, 0xbd, 0x19, 0xd4, 0xca, 0x5b, 0x5c, 0x76, 0x8d, 0x78, 0x2b, 0x81, 0x92, - 0x06, 0xca, 0x03, 0x9a, 0x88, 0x7a, 0xfb, 0xc6, 0xb9, 0x1d, 0x6f, 0x9c, 0xb2, 0xd0, 0x38, 0x23, - 0x1e, 0x43, 0x66, 0xef, 0x24, 0x58, 0xfe, 0x42, 0xb7, 0xba, 0x26, 0x6e, 0x9d, 0x34, 0xbd, 0x49, - 0xc5, 0xbb, 0x46, 0x49, 0xe6, 0x29, 0xc9, 0x12, 0x01, 0x6e, 0x9d, 0x34, 0x43, 0xf5, 0xa0, 0xbb, - 0xfd, 0xf1, 0xfd, 0xa5, 0xcf, 0x44, 0x70, 0xb6, 0x03, 0xc1, 0x39, 0x47, 0x71, 0xc2, 0x09, 0x41, - 0x1e, 0xce, 0x8b, 0xf2, 0x90, 0xc8, 0x58, 0xef, 0xdc, 0x3c, 0xd6, 0x7d, 0x1f, 0xbb, 0x96, 0x5c, - 0x64, 0x32, 0x36, 0x9c, 0x21, 0xa5, 0xcf, 0x3b, 0xd3, 0xdd, 0xae, 0x61, 0xf5, 0xe5, 0x7b, 0x34, - 0x1d, 0xc1, 0x98, 0xb4, 0x6f, 0x31, 0x92, 0x99, 0x1c, 0x96, 0xb7, 0x12, 0x54, 0x8f, 0x47, 0x6e, - 0x3f, 0xed, 0xac, 0x64, 0xd7, 0x01, 0x05, 0xee, 0x1b, 0x96, 0xde, 0xf1, 0x8d, 0x0b, 0xcc, 0x37, - 0x35, 0x18, 0xd3, 0x1a, 0x61, 0x0c, 0x99, 0x10, 0x2f, 0x68, 0xf4, 0x99, 0xbc, 0xdf, 0x33, 0x4c, - 0x4c, 0xb7, 0x84, 0xa5, 0x32, 0x18, 0xd3, 0x4c, 0x8e, 0xda, 0x0d, 0xc3, 0x0d, 0x32, 0x49, 0x47, - 0xea, 0x1b, 0x90, 0x93, 0xc4, 0x66, 0x92, 0x93, 0x67, 0xb0, 0xb4, 0x77, 0x86, 0x3b, 0x83, 0x0f, - 0x08, 0x31, 0xf5, 0x05, 0x2c, 0x0b, 0xef, 0x4d, 0x4b, 0x8d, 0x28, 0x0f, 0x0d, 0xf7, 0x0d, 0x52, - 0x19, 0xc6, 0x4c, 0x26, 0x2a, 0x0f, 0xbd, 0xdb, 0x75, 0xb1, 0xe7, 0x71, 0x88, 0xf1, 0x50, 0xdd, - 0x85, 0x4a, 0x1c, 0x66, 0x6a, 0x2a, 0x0d, 0x28, 0x1f, 0xf5, 0x7a, 0xa6, 0x61, 0xe1, 0xff, 0xc2, - 0xe4, 0x25, 0xac, 0xc4, 0x50, 0xa6, 0x25, 0xb2, 0xfd, 0xcb, 0x03, 0x28, 0xb2, 0x62, 0x89, 0x5e, - 0x43, 0x29, 0xf8, 0x4c, 0x42, 0x65, 0x7a, 0x5f, 0x63, 0xdf, 0x5d, 0xca, 0x4a, 0x6c, 0x96, 0xb9, - 0x53, 0x9f, 0xfe, 0xf8, 0xe7, 0x3f, 0xbf, 0xe7, 0x6b, 0x6a, 0x99, 0x7c, 0x87, 0x7a, 0x9b, 0x17, - 0x5b, 0xba, 0xe9, 0x9c, 0xe9, 0x5b, 0x9b, 0x64, 0xd3, 0xbc, 0x1d, 0xe9, 0x63, 0xd4, 0x83, 0x05, - 0x41, 0xfd, 0xa3, 0x8a, 0x50, 0x65, 0x44, 0xf8, 0x6a, 0x62, 0x9e, 0x3b, 0x78, 0x46, 0x1d, 0xac, - 0x29, 0x8f, 0xd3, 0x1c, 0x6c, 0x7e, 0x47, 0xd2, 0xf4, 0x3d, 0xf1, 0xf3, 0x02, 0x20, 0xd4, 0xe3, - 0x88, 0xb2, 0x4d, 0x48, 0x7c, 0xa5, 0x12, 0x9f, 0xe6, 0x4e, 0x72, 0xc8, 0x84, 0x05, 0x41, 0xba, - 0x22, 0x25, 0xa6, 0x65, 0x05, 0xb1, 0xaa, 0x3c, 0x4e, 0xb5, 0x71, 0xa4, 0x8f, 0x28, 0xdd, 0x3a, - 0x5a, 0x8d, 0xd1, 0xf5, 0xe8, 0xab, 0x9c, 0x2f, 0xda, 0x07, 0x08, 0xa5, 0x26, 0xaa, 0x45, 0xa5, - 0xa7, 0xe8, 0x4b, 0x49, 0x33, 0x05, 0xa4, 0xf7, 0xe0, 0x81, 0xa8, 0xdf, 0x10, 0x4d, 0x62, 0x8a, - 0x48, 0x55, 0xe4, 0xa4, 0x21, 0x00, 0x79, 0x05, 0x0f, 0x23, 0xb2, 0x08, 0xd1, 0x97, 0xd3, 0x14, - 0x9b, 0x52, 0x4b, 0xb1, 0x04, 0x38, 0xa7, 0xe3, 0x0f, 0x22, 0xf1, 0x13, 0x16, 0x3d, 0x09, 0x33, - 0x9e, 0xf2, 0x3d, 0xac, 0xd4, 0xb3, 0xcc, 0x01, 0xec, 0x6b, 0xa8, 0x66, 0xfc, 0x55, 0x40, 0x6a, - 0xb8, 0x38, 0xeb, 0x97, 0x83, 0x92, 0x59, 0x82, 0x18, 0xe3, 0xa4, 0xea, 0x61, 0x8c, 0x33, 0x05, - 0x1b, 0x63, 0x9c, 0x2d, 0x96, 0xd4, 0x1c, 0x39, 0x89, 0x61, 0x6b, 0x61, 0x27, 0x31, 0xd1, 0x34, - 0xd9, 0x49, 0x4c, 0x76, 0x20, 0x35, 0x87, 0xba, 0x50, 0xcd, 0xd0, 0x34, 0x2c, 0xe0, 0xc9, 0x62, - 0x49, 0xf9, 0xff, 0xc4, 0x77, 0x84, 0xb4, 0x56, 0x92, 0x1a, 0x83, 0x5e, 0x9d, 0x27, 0xc2, 0x4d, - 0x4c, 0x36, 0x2a, 0x16, 0x7f, 0xb6, 0x3c, 0x51, 0x73, 0xe8, 0x08, 0x96, 0xe2, 0xcd, 0x04, 0xd1, - 0x3b, 0x93, 0xd1, 0xfb, 0x94, 0xd5, 0x74, 0xa3, 0x00, 0x88, 0x92, 0xff, 0xb2, 0x18, 0xcf, 0xcc, - 0x7f, 0x5c, 0x13, 0x37, 0x7e, 0x07, 0x4a, 0x41, 0x33, 0x61, 0xe5, 0x2e, 0xde, 0x83, 0x58, 0xb9, - 0x4b, 0x74, 0x1c, 0x35, 0x87, 0x3e, 0x0f, 0x7e, 0x97, 0x1c, 0x5e, 0x79, 0xe7, 0x26, 0xc3, 0x66, - 0x65, 0x4d, 0x98, 0x88, 0x94, 0xb5, 0xc8, 0x7c, 0x00, 0x74, 0x00, 0x8b, 0xd1, 0x5e, 0xc2, 0xea, - 0x40, 0x6a, 0x9b, 0x62, 0x75, 0x20, 0xbd, 0xf5, 0xb0, 0x2b, 0x1c, 0x69, 0x06, 0xec, 0x0a, 0xa7, - 0x75, 0x19, 0x76, 0x85, 0x53, 0x3b, 0x87, 0x9a, 0xdb, 0x95, 0xdf, 0x5d, 0xd7, 0xa5, 0xf7, 0xd7, - 0x75, 0xe9, 0xef, 0xeb, 0xba, 0xf4, 0xeb, 0x4d, 0x3d, 0xf7, 0xfe, 0xa6, 0x9e, 0xfb, 0xeb, 0xa6, - 0x9e, 0x6b, 0x17, 0xe9, 0x5f, 0xc5, 0xe7, 0xff, 0x06, 0x00, 0x00, 0xff, 0xff, 0xb0, 0x45, 0xe1, - 0x29, 0x99, 0x14, 0x00, 0x00, + 0x13, 0x16, 0x29, 0xdb, 0xb1, 0xc6, 0x8e, 0x23, 0x6f, 0x64, 0x99, 0x62, 0x1c, 0xc5, 0x2f, 0xdf, + 0x22, 0x30, 0x72, 0x88, 0x1b, 0xa7, 0xa7, 0x00, 0x01, 0x1a, 0x5b, 0x0e, 0x6a, 0x40, 0xa9, 0x1d, + 0xaa, 0x41, 0x91, 0x4b, 0x01, 0x4a, 0x5a, 0xc9, 0x84, 0x28, 0x92, 0x26, 0x29, 0x3b, 0x6e, 0x51, + 0x14, 0xe8, 0xa1, 0x97, 0x02, 0x45, 0x8b, 0x1e, 0x72, 0xee, 0xbf, 0xc9, 0x31, 0x40, 0x2f, 0x3d, + 0x16, 0x49, 0x7f, 0x48, 0xb1, 0x1f, 0x24, 0x97, 0x5f, 0x4a, 0xe4, 0x02, 0xba, 0x71, 0x77, 0xc8, + 0x67, 0x9e, 0x99, 0x9d, 0xdd, 0x79, 0x96, 0xb0, 0xd6, 0x1f, 0x8f, 0x0d, 0x3f, 0xc0, 0xde, 0x7d, + 0xd7, 0x73, 0x02, 0x07, 0xc9, 0x6e, 0x57, 0x5d, 0xeb, 0x8f, 0x2f, 0x1c, 0x6f, 0x14, 0xce, 0xa9, + 0x5b, 0x43, 0xc7, 0x19, 0x5a, 0x78, 0xd7, 0x70, 0xcd, 0x5d, 0xc3, 0xb6, 0x9d, 0xc0, 0x08, 0x4c, + 0xc7, 0xf6, 0x99, 0x55, 0x3b, 0x83, 0xc6, 0x33, 0x73, 0xe8, 0x19, 0x01, 0xfe, 0x9a, 0x7e, 0xa4, + 0x63, 0xcb, 0xb8, 0xd4, 0xf1, 0xd9, 0x04, 0xfb, 0x01, 0x6a, 0x02, 0xec, 0x9b, 0xb6, 0xe5, 0x0c, + 0xbf, 0x34, 0xc6, 0x58, 0x91, 0xb6, 0xa5, 0x9d, 0x8a, 0x2e, 0xcc, 0xa0, 0x2d, 0xa8, 0xb0, 0xd1, + 0x89, 0xe3, 0x2b, 0xf2, 0xb6, 0xb4, 0x73, 0x5d, 0x8f, 0x27, 0x50, 0x1d, 0x96, 0x7c, 0x67, 0xe2, + 0xf5, 0xb0, 0x52, 0xa6, 0x5f, 0xf2, 0x91, 0x76, 0x02, 0xcd, 0x17, 0x6e, 0x3f, 0xe9, 0xf1, 0xc0, + 0xb1, 0x07, 0xe6, 0x30, 0xf4, 0x5b, 0x87, 0xa5, 0x1e, 0x9d, 0xe0, 0x3e, 0xf9, 0x48, 0x40, 0x94, + 0x13, 0x88, 0x9f, 0x43, 0xb5, 0x13, 0x18, 0x5e, 0xf0, 0x95, 0xe1, 0x8f, 0x42, 0x0c, 0x04, 0x0b, + 0x81, 0xe1, 0x8f, 0x38, 0x02, 0x7d, 0x46, 0x0a, 0x5c, 0x63, 0x5f, 0x10, 0xb6, 0xe5, 0x9d, 0x8a, + 0x1e, 0x0e, 0xb5, 0x33, 0x58, 0x17, 0x10, 0x7c, 0xd7, 0xb1, 0x7d, 0x4c, 0xdc, 0x79, 0xd8, 0x9f, + 0x58, 0x01, 0x05, 0x59, 0xd6, 0xf9, 0x08, 0x55, 0xa1, 0x3c, 0xf6, 0x87, 0x9c, 0x03, 0x79, 0x44, + 0x7b, 0x31, 0x70, 0x79, 0xbb, 0xbc, 0xb3, 0xb2, 0xa7, 0xdc, 0x77, 0xbb, 0xf7, 0x0f, 0x9c, 0xf1, + 0xd8, 0xb1, 0xc3, 0x28, 0x19, 0x68, 0xec, 0xf2, 0x21, 0x34, 0x58, 0x1a, 0x9e, 0xd1, 0x15, 0xfc, + 0xa8, 0x0c, 0x68, 0x97, 0xa0, 0xe6, 0x7d, 0x34, 0x33, 0xe1, 0x07, 0x69, 0xc2, 0x9b, 0x84, 0xf0, + 0xf3, 0x09, 0xf6, 0x2e, 0x3b, 0x81, 0x11, 0x4c, 0xfc, 0x2c, 0xdf, 0x6f, 0x00, 0x1d, 0xbb, 0x98, + 0x54, 0x8a, 0x98, 0x66, 0x15, 0x64, 0xc7, 0xa5, 0xee, 0xd6, 0xf6, 0x80, 0x60, 0x10, 0xe3, 0xb1, + 0xab, 0xcb, 0x8e, 0x4b, 0x96, 0xc0, 0x26, 0x85, 0xc3, 0xfc, 0xd2, 0x67, 0x71, 0x09, 0xca, 0xc9, + 0x25, 0xf8, 0x4d, 0x82, 0x9b, 0x09, 0x07, 0x3c, 0xa8, 0x69, 0x1e, 0xe2, 0x80, 0xe5, 0xbc, 0x80, + 0xcb, 0x71, 0xc0, 0x9f, 0xc5, 0x7e, 0x17, 0x68, 0xc0, 0x2a, 0x81, 0xe2, 0xfe, 0x3a, 0x93, 0xae, + 0xe8, 0x32, 0xe6, 0xf4, 0x04, 0xd6, 0x59, 0xba, 0xaf, 0x5e, 0x59, 0x1e, 0x20, 0x11, 0x62, 0x2e, + 0xa5, 0xf5, 0x14, 0xea, 0xc2, 0x52, 0xb6, 0x4d, 0x3f, 0x10, 0xb8, 0xdb, 0xf1, 0x5e, 0xce, 0x2c, + 0x49, 0x8a, 0xfb, 0x39, 0x6c, 0x66, 0x70, 0xe6, 0x51, 0x6a, 0x87, 0xb0, 0x41, 0xed, 0x87, 0x9e, + 0xe7, 0x78, 0x57, 0xa7, 0x1f, 0xf0, 0x34, 0x08, 0x30, 0x33, 0xb3, 0xff, 0x34, 0xcd, 0xbe, 0x1e, + 0xb1, 0xa7, 0xb0, 0x59, 0xf2, 0x07, 0x70, 0xb3, 0x73, 0xea, 0x5c, 0xb4, 0x5a, 0xed, 0xb6, 0xd3, + 0x1b, 0xf9, 0x57, 0xab, 0x9a, 0x9f, 0x25, 0xb8, 0xc6, 0x11, 0xd0, 0x1a, 0xc8, 0x47, 0x2d, 0xfe, + 0x9d, 0x7c, 0xd4, 0x8a, 0x90, 0x64, 0x01, 0xa9, 0x06, 0x8b, 0xce, 0x85, 0x8d, 0x3d, 0x5e, 0xf2, + 0x6c, 0x40, 0xde, 0x6c, 0xb5, 0xda, 0xac, 0xe2, 0x2b, 0x3a, 0x7d, 0xa6, 0x67, 0xe8, 0xa5, 0xdd, + 0xc3, 0x7d, 0x65, 0x91, 0xce, 0xf2, 0x11, 0x52, 0x61, 0x79, 0x62, 0x73, 0xcb, 0x12, 0xb5, 0x44, + 0x63, 0xad, 0x07, 0xb5, 0x64, 0x48, 0x33, 0xa7, 0xf1, 0x7f, 0xb0, 0x68, 0x91, 0x4f, 0x79, 0x12, + 0x57, 0x48, 0x12, 0x39, 0x9c, 0xce, 0x2c, 0xda, 0x4f, 0x12, 0xd4, 0x5e, 0xd8, 0xe4, 0x39, 0x34, + 0xf0, 0xcc, 0xa5, 0xe3, 0xd7, 0x60, 0xd5, 0xc3, 0xae, 0x65, 0xf4, 0xf0, 0x31, 0x0d, 0x99, 0xb9, + 0x49, 0xcc, 0x15, 0x1f, 0x33, 0x68, 0x1b, 0x56, 0x06, 0x8e, 0xd7, 0xc3, 0x3a, 0x1e, 0x3b, 0xe7, + 0x58, 0x59, 0xa0, 0xc4, 0xc5, 0x29, 0x6d, 0x02, 0x1b, 0x29, 0x1e, 0x57, 0xd9, 0xb4, 0xac, 0x07, + 0x7f, 0xc4, 0xa6, 0xe5, 0x2f, 0x6a, 0x7f, 0x48, 0xd0, 0xd8, 0xf7, 0xb0, 0x31, 0x62, 0x2f, 0xa4, + 0x92, 0x20, 0x04, 0x24, 0x25, 0x03, 0xca, 0x2b, 0x07, 0x9a, 0x22, 0x12, 0x0c, 0x81, 0x38, 0x6a, + 0xf1, 0xaa, 0x48, 0xcc, 0x11, 0x44, 0xfc, 0x0a, 0xf7, 0x5a, 0xad, 0x36, 0x4f, 0x42, 0x38, 0xa4, + 0xbe, 0x46, 0xa6, 0x4b, 0x2c, 0x8b, 0xcc, 0xc2, 0x87, 0xda, 0xb7, 0xa0, 0xe6, 0x51, 0x9c, 0xcb, + 0xa1, 0xf6, 0x08, 0x9a, 0x9d, 0x0b, 0x33, 0xe8, 0x9d, 0x0a, 0xb2, 0x81, 0x75, 0xc1, 0x0f, 0xe6, + 0x48, 0xfb, 0x01, 0xee, 0x14, 0x7e, 0x3b, 0x17, 0xf2, 0x3a, 0x34, 0x78, 0xaf, 0xc9, 0x91, 0x59, + 0xb7, 0x84, 0x0e, 0x47, 0x77, 0x06, 0xb5, 0xf2, 0x16, 0x57, 0x7c, 0x46, 0xbc, 0x96, 0x40, 0xcd, + 0x03, 0xe5, 0x01, 0x4d, 0x45, 0xfd, 0xf8, 0xc6, 0xb9, 0x97, 0x6e, 0x9c, 0x8a, 0xd0, 0x38, 0x13, + 0x1e, 0x63, 0x66, 0x6f, 0x24, 0x58, 0xff, 0xc2, 0xb0, 0xfb, 0x16, 0xee, 0x3c, 0x6f, 0xfb, 0xd3, + 0x0e, 0xef, 0x06, 0x25, 0x29, 0x53, 0x92, 0x15, 0x02, 0xdc, 0x79, 0xde, 0x8e, 0xd5, 0x83, 0xe1, + 0x0d, 0xc3, 0xfd, 0x4b, 0x9f, 0x89, 0xe0, 0xec, 0x46, 0x82, 0x73, 0x81, 0xe2, 0xc4, 0x13, 0x82, + 0x3c, 0x5c, 0x14, 0xe5, 0x21, 0x91, 0xb1, 0xfe, 0x99, 0x75, 0x62, 0x04, 0x01, 0xf6, 0x6c, 0x65, + 0x89, 0xc9, 0xd8, 0x78, 0x86, 0x1c, 0x7d, 0xfe, 0xa9, 0xe1, 0xf5, 0x4d, 0x7b, 0xa8, 0x5c, 0xa3, + 0xe9, 0x88, 0xc6, 0xa4, 0x7d, 0x8b, 0x91, 0xcc, 0xa5, 0x58, 0x5e, 0x4b, 0xb0, 0x79, 0x32, 0xf1, + 0x86, 0x79, 0xb5, 0x52, 0x7c, 0x0e, 0xa8, 0xb0, 0x6c, 0xda, 0x46, 0x2f, 0x30, 0xcf, 0x31, 0x5f, + 0xd4, 0x68, 0x4c, 0xcf, 0x08, 0x73, 0xcc, 0x84, 0x78, 0x59, 0xa7, 0xcf, 0xe4, 0xfd, 0x81, 0x69, + 0x61, 0xba, 0x24, 0x2c, 0x95, 0xd1, 0x98, 0x66, 0x72, 0xd2, 0x6d, 0x99, 0x5e, 0x94, 0x49, 0x3a, + 0xd2, 0x5e, 0x81, 0x92, 0x25, 0x36, 0x97, 0x9c, 0xdc, 0x85, 0xea, 0xc1, 0x29, 0xee, 0x8d, 0x3e, + 0x20, 0xc4, 0xb4, 0xc7, 0xb0, 0x2e, 0xbc, 0x37, 0x2b, 0x35, 0xad, 0x0d, 0xb5, 0x50, 0x13, 0x52, + 0xc7, 0xa1, 0xab, 0x2d, 0x61, 0x33, 0xad, 0xd2, 0x3a, 0xa5, 0xe6, 0x78, 0x37, 0x71, 0xb5, 0x2e, + 0x27, 0xd4, 0xfa, 0x13, 0xd8, 0x48, 0xa1, 0xcd, 0x4c, 0xe8, 0x10, 0x36, 0x74, 0x3c, 0x34, 0xc9, + 0x51, 0x15, 0xa6, 0x66, 0xaa, 0x14, 0x32, 0xfa, 0x7d, 0x0f, 0xfb, 0x3e, 0x87, 0x08, 0x87, 0xda, + 0x3e, 0xd4, 0xd3, 0x30, 0x33, 0x53, 0x69, 0x41, 0xed, 0x78, 0x30, 0xb0, 0x4c, 0x1b, 0xff, 0x17, + 0x26, 0x24, 0x27, 0x49, 0x94, 0x59, 0x89, 0xdc, 0xd3, 0x61, 0x39, 0x4c, 0x3f, 0xba, 0x09, 0x37, + 0x8e, 0xec, 0x73, 0xc3, 0x32, 0xfb, 0xe1, 0x54, 0xb5, 0x84, 0x6e, 0xc0, 0x0a, 0xbd, 0xcd, 0xb1, + 0xa9, 0xaa, 0x84, 0xaa, 0xb0, 0xca, 0x44, 0x38, 0x9f, 0x91, 0xd1, 0x1a, 0x40, 0x27, 0x70, 0x5c, + 0x3e, 0x2e, 0xef, 0xfd, 0xb2, 0x0a, 0x4b, 0xac, 0x23, 0xa0, 0x97, 0x50, 0x89, 0xee, 0x82, 0xa8, + 0x46, 0x17, 0x3b, 0x75, 0xb9, 0x54, 0x37, 0x52, 0xb3, 0x2c, 0x04, 0xed, 0xce, 0x8f, 0x7f, 0xfe, + 0xf3, 0xbb, 0xdc, 0xd0, 0x6a, 0xe4, 0xb2, 0xed, 0xef, 0x9e, 0x3f, 0x30, 0x2c, 0xf7, 0xd4, 0x78, + 0xb0, 0x4b, 0x2a, 0xd3, 0x7f, 0x24, 0xdd, 0x43, 0x03, 0x58, 0x11, 0xae, 0x38, 0xa8, 0x2e, 0x1c, + 0xa5, 0x22, 0xfc, 0x66, 0x66, 0x9e, 0x3b, 0xb8, 0x4b, 0x1d, 0x6c, 0xab, 0xb7, 0xf2, 0x1c, 0xec, + 0x7e, 0x47, 0x52, 0xff, 0x3d, 0xf1, 0xf3, 0x18, 0x20, 0xbe, 0x74, 0x20, 0xca, 0x36, 0x73, 0x8f, + 0x51, 0xeb, 0xe9, 0x69, 0xee, 0xa4, 0x84, 0x2c, 0x58, 0x11, 0xf4, 0x39, 0x52, 0x53, 0x82, 0x5d, + 0x50, 0xe4, 0xea, 0xad, 0x5c, 0x1b, 0x47, 0xfa, 0x84, 0xd2, 0x6d, 0xa2, 0xad, 0x14, 0x5d, 0x9f, + 0xbe, 0xca, 0xf9, 0xa2, 0x43, 0x80, 0x58, 0x4f, 0xa3, 0x46, 0x52, 0x5f, 0x8b, 0xbe, 0xd4, 0x3c, + 0x53, 0x44, 0xfa, 0x00, 0x56, 0x45, 0x91, 0x8a, 0x68, 0x12, 0x73, 0x94, 0xb8, 0xaa, 0x64, 0x0d, + 0x11, 0xc8, 0x53, 0xb8, 0x9e, 0xd0, 0x7e, 0x88, 0xbe, 0x9c, 0x27, 0x4b, 0xd5, 0x46, 0x8e, 0x25, + 0xc2, 0x79, 0x11, 0xde, 0xfa, 0xc4, 0x7b, 0x3a, 0xba, 0x1d, 0x67, 0x3c, 0xe7, 0xd2, 0xaf, 0x36, + 0x8b, 0xcc, 0x11, 0xec, 0x4b, 0xd8, 0x2c, 0xf8, 0x75, 0x82, 0xb4, 0xf8, 0xe3, 0xa2, 0xff, 0x2a, + 0x6a, 0xe1, 0x39, 0xcb, 0x18, 0x67, 0xa5, 0x1d, 0x63, 0x5c, 0xa8, 0x4a, 0x19, 0xe3, 0x62, 0x45, + 0xa8, 0x95, 0x48, 0x25, 0xc6, 0xfd, 0x93, 0x55, 0x62, 0x46, 0x19, 0xb0, 0x4a, 0xcc, 0xb6, 0x59, + 0xad, 0x84, 0xfa, 0xb0, 0x59, 0x20, 0xdc, 0x58, 0xc0, 0xd3, 0x15, 0xa1, 0xfa, 0xff, 0xa9, 0xef, + 0x08, 0x69, 0xad, 0x67, 0x85, 0x14, 0xdd, 0x3a, 0xb7, 0x85, 0x9d, 0x98, 0xed, 0xc6, 0x2c, 0xfe, + 0x62, 0x0d, 0xa6, 0x95, 0xd0, 0x31, 0x54, 0xd3, 0x1d, 0x13, 0xd1, 0x3d, 0x53, 0xd0, 0xe0, 0xd5, + 0xad, 0x7c, 0xa3, 0x00, 0x88, 0xb2, 0x3f, 0xec, 0x18, 0xcf, 0xc2, 0x1f, 0x79, 0x53, 0x17, 0xfe, + 0x11, 0x54, 0xa2, 0x8e, 0xc9, 0x8e, 0xbb, 0x74, 0xa3, 0x65, 0xc7, 0x5d, 0xa6, 0xad, 0xb2, 0xed, + 0x92, 0x68, 0x70, 0x48, 0x14, 0x87, 0x89, 0x0e, 0xca, 0xb6, 0x4b, 0x6e, 0x37, 0xd4, 0x4a, 0xe8, + 0x08, 0xd6, 0x92, 0xed, 0x89, 0x1d, 0x03, 0xb9, 0x9d, 0x8f, 0x1d, 0x03, 0xf9, 0xdd, 0x8c, 0x53, + 0x12, 0xfb, 0x0b, 0xa7, 0x94, 0xd3, 0xb8, 0x38, 0xa5, 0xbc, 0x66, 0xa4, 0x95, 0xf6, 0x95, 0x37, + 0xef, 0x9a, 0xd2, 0xdb, 0x77, 0x4d, 0xe9, 0xef, 0x77, 0x4d, 0xe9, 0xd7, 0xf7, 0xcd, 0xd2, 0xdb, + 0xf7, 0xcd, 0xd2, 0x5f, 0xef, 0x9b, 0xa5, 0xee, 0x12, 0xfd, 0x73, 0xfa, 0xf0, 0xdf, 0x00, 0x00, + 0x00, 0xff, 0xff, 0x87, 0x52, 0xe0, 0x1c, 0x7d, 0x15, 0x00, 0x00, } // Reference imports to suppress errors if they are not otherwise used. @@ -2306,8 +2448,8 @@ type MasterClient interface { MigrateWorkerRelay(ctx context.Context, in *MigrateWorkerRelayRequest, opts ...grpc.CallOption) (*CommonWorkerResponse, error) // CheckTask checks legality of task configuration CheckTask(ctx context.Context, in *CheckTaskRequest, opts ...grpc.CallOption) (*CheckTaskResponse, error) - // Operate mysql-worker for server - OperateMysqlWorker(ctx context.Context, in *MysqlWorkerRequest, opts ...grpc.CallOption) (*MysqlWorkerResponse, error) + // Operate an upstream MySQL source. + OperateSource(ctx context.Context, in *OperateSourceRequest, opts ...grpc.CallOption) (*OperateSourceResponse, error) // RegisterWorker register the dm-workers. RegisterWorker(ctx context.Context, in *RegisterWorkerRequest, opts ...grpc.CallOption) (*RegisterWorkerResponse, error) // OfflineWorker offline the dm-workers. @@ -2466,9 +2608,9 @@ func (c *masterClient) CheckTask(ctx context.Context, in *CheckTaskRequest, opts return out, nil } -func (c *masterClient) OperateMysqlWorker(ctx context.Context, in *MysqlWorkerRequest, opts ...grpc.CallOption) (*MysqlWorkerResponse, error) { - out := new(MysqlWorkerResponse) - err := c.cc.Invoke(ctx, "/pb.Master/OperateMysqlWorker", in, out, opts...) +func (c *masterClient) OperateSource(ctx context.Context, in *OperateSourceRequest, opts ...grpc.CallOption) (*OperateSourceResponse, error) { + out := new(OperateSourceResponse) + err := c.cc.Invoke(ctx, "/pb.Master/OperateSource", in, out, opts...) if err != nil { return nil, err } @@ -2520,8 +2662,8 @@ type MasterServer interface { MigrateWorkerRelay(context.Context, *MigrateWorkerRelayRequest) (*CommonWorkerResponse, error) // CheckTask checks legality of task configuration CheckTask(context.Context, *CheckTaskRequest) (*CheckTaskResponse, error) - // Operate mysql-worker for server - OperateMysqlWorker(context.Context, *MysqlWorkerRequest) (*MysqlWorkerResponse, error) + // Operate an upstream MySQL source. + OperateSource(context.Context, *OperateSourceRequest) (*OperateSourceResponse, error) // RegisterWorker register the dm-workers. RegisterWorker(context.Context, *RegisterWorkerRequest) (*RegisterWorkerResponse, error) // OfflineWorker offline the dm-workers. @@ -2580,8 +2722,8 @@ func (*UnimplementedMasterServer) MigrateWorkerRelay(ctx context.Context, req *M func (*UnimplementedMasterServer) CheckTask(ctx context.Context, req *CheckTaskRequest) (*CheckTaskResponse, error) { return nil, status.Errorf(codes.Unimplemented, "method CheckTask not implemented") } -func (*UnimplementedMasterServer) OperateMysqlWorker(ctx context.Context, req *MysqlWorkerRequest) (*MysqlWorkerResponse, error) { - return nil, status.Errorf(codes.Unimplemented, "method OperateMysqlWorker not implemented") +func (*UnimplementedMasterServer) OperateSource(ctx context.Context, req *OperateSourceRequest) (*OperateSourceResponse, error) { + return nil, status.Errorf(codes.Unimplemented, "method OperateSource not implemented") } func (*UnimplementedMasterServer) RegisterWorker(ctx context.Context, req *RegisterWorkerRequest) (*RegisterWorkerResponse, error) { return nil, status.Errorf(codes.Unimplemented, "method RegisterWorker not implemented") @@ -2882,20 +3024,20 @@ func _Master_CheckTask_Handler(srv interface{}, ctx context.Context, dec func(in return interceptor(ctx, in, info, handler) } -func _Master_OperateMysqlWorker_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { - in := new(MysqlWorkerRequest) +func _Master_OperateSource_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { + in := new(OperateSourceRequest) if err := dec(in); err != nil { return nil, err } if interceptor == nil { - return srv.(MasterServer).OperateMysqlWorker(ctx, in) + return srv.(MasterServer).OperateSource(ctx, in) } info := &grpc.UnaryServerInfo{ Server: srv, - FullMethod: "/pb.Master/OperateMysqlWorker", + FullMethod: "/pb.Master/OperateSource", } handler := func(ctx context.Context, req interface{}) (interface{}, error) { - return srv.(MasterServer).OperateMysqlWorker(ctx, req.(*MysqlWorkerRequest)) + return srv.(MasterServer).OperateSource(ctx, req.(*OperateSourceRequest)) } return interceptor(ctx, in, info, handler) } @@ -3005,8 +3147,8 @@ var _Master_serviceDesc = grpc.ServiceDesc{ Handler: _Master_CheckTask_Handler, }, { - MethodName: "OperateMysqlWorker", - Handler: _Master_OperateMysqlWorker_Handler, + MethodName: "OperateSource", + Handler: _Master_OperateSource_Handler, }, { MethodName: "RegisterWorker", @@ -4549,6 +4691,81 @@ func (m *CheckTaskResponse) MarshalToSizedBuffer(dAtA []byte) (int, error) { return len(dAtA) - i, nil } +func (m *OperateSourceRequest) Marshal() (dAtA []byte, err error) { + size := m.Size() + dAtA = make([]byte, size) + n, err := m.MarshalToSizedBuffer(dAtA[:size]) + if err != nil { + return nil, err + } + return dAtA[:n], nil +} + +func (m *OperateSourceRequest) MarshalTo(dAtA []byte) (int, error) { + size := m.Size() + return m.MarshalToSizedBuffer(dAtA[:size]) +} + +func (m *OperateSourceRequest) MarshalToSizedBuffer(dAtA []byte) (int, error) { + i := len(dAtA) + _ = i + var l int + _ = l + if len(m.Config) > 0 { + i -= len(m.Config) + copy(dAtA[i:], m.Config) + i = encodeVarintDmmaster(dAtA, i, uint64(len(m.Config))) + i-- + dAtA[i] = 0x12 + } + if m.Op != 0 { + i = encodeVarintDmmaster(dAtA, i, uint64(m.Op)) + i-- + dAtA[i] = 0x8 + } + return len(dAtA) - i, nil +} + +func (m *OperateSourceResponse) Marshal() (dAtA []byte, err error) { + size := m.Size() + dAtA = make([]byte, size) + n, err := m.MarshalToSizedBuffer(dAtA[:size]) + if err != nil { + return nil, err + } + return dAtA[:n], nil +} + +func (m *OperateSourceResponse) MarshalTo(dAtA []byte) (int, error) { + size := m.Size() + return m.MarshalToSizedBuffer(dAtA[:size]) +} + +func (m *OperateSourceResponse) MarshalToSizedBuffer(dAtA []byte) (int, error) { + i := len(dAtA) + _ = i + var l int + _ = l + if len(m.Msg) > 0 { + i -= len(m.Msg) + copy(dAtA[i:], m.Msg) + i = encodeVarintDmmaster(dAtA, i, uint64(len(m.Msg))) + i-- + dAtA[i] = 0x12 + } + if m.Result { + i-- + if m.Result { + dAtA[i] = 1 + } else { + dAtA[i] = 0 + } + i-- + dAtA[i] = 0x8 + } + return len(dAtA) - i, nil +} + func (m *RegisterWorkerRequest) Marshal() (dAtA []byte, err error) { size := m.Size() dAtA = make([]byte, size) @@ -5395,6 +5612,38 @@ func (m *CheckTaskResponse) Size() (n int) { return n } +func (m *OperateSourceRequest) Size() (n int) { + if m == nil { + return 0 + } + var l int + _ = l + if m.Op != 0 { + n += 1 + sovDmmaster(uint64(m.Op)) + } + l = len(m.Config) + if l > 0 { + n += 1 + l + sovDmmaster(uint64(l)) + } + return n +} + +func (m *OperateSourceResponse) Size() (n int) { + if m == nil { + return 0 + } + var l int + _ = l + if m.Result { + n += 2 + } + l = len(m.Msg) + if l > 0 { + n += 1 + l + sovDmmaster(uint64(l)) + } + return n +} + func (m *RegisterWorkerRequest) Size() (n int) { if m == nil { return 0 @@ -9793,6 +10042,215 @@ func (m *CheckTaskResponse) Unmarshal(dAtA []byte) error { } return nil } +func (m *OperateSourceRequest) Unmarshal(dAtA []byte) error { + l := len(dAtA) + iNdEx := 0 + for iNdEx < l { + preIndex := iNdEx + var wire uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowDmmaster + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + wire |= uint64(b&0x7F) << shift + if b < 0x80 { + break + } + } + fieldNum := int32(wire >> 3) + wireType := int(wire & 0x7) + if wireType == 4 { + return fmt.Errorf("proto: OperateSourceRequest: wiretype end group for non-group") + } + if fieldNum <= 0 { + return fmt.Errorf("proto: OperateSourceRequest: illegal tag %d (wire type %d)", fieldNum, wire) + } + switch fieldNum { + case 1: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field Op", wireType) + } + m.Op = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowDmmaster + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.Op |= SourceOp(b&0x7F) << shift + if b < 0x80 { + break + } + } + case 2: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field Config", wireType) + } + var stringLen uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowDmmaster + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + stringLen |= uint64(b&0x7F) << shift + if b < 0x80 { + break + } + } + intStringLen := int(stringLen) + if intStringLen < 0 { + return ErrInvalidLengthDmmaster + } + postIndex := iNdEx + intStringLen + if postIndex < 0 { + return ErrInvalidLengthDmmaster + } + if postIndex > l { + return io.ErrUnexpectedEOF + } + m.Config = string(dAtA[iNdEx:postIndex]) + iNdEx = postIndex + default: + iNdEx = preIndex + skippy, err := skipDmmaster(dAtA[iNdEx:]) + if err != nil { + return err + } + if skippy < 0 { + return ErrInvalidLengthDmmaster + } + if (iNdEx + skippy) < 0 { + return ErrInvalidLengthDmmaster + } + if (iNdEx + skippy) > l { + return io.ErrUnexpectedEOF + } + iNdEx += skippy + } + } + + if iNdEx > l { + return io.ErrUnexpectedEOF + } + return nil +} +func (m *OperateSourceResponse) Unmarshal(dAtA []byte) error { + l := len(dAtA) + iNdEx := 0 + for iNdEx < l { + preIndex := iNdEx + var wire uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowDmmaster + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + wire |= uint64(b&0x7F) << shift + if b < 0x80 { + break + } + } + fieldNum := int32(wire >> 3) + wireType := int(wire & 0x7) + if wireType == 4 { + return fmt.Errorf("proto: OperateSourceResponse: wiretype end group for non-group") + } + if fieldNum <= 0 { + return fmt.Errorf("proto: OperateSourceResponse: illegal tag %d (wire type %d)", fieldNum, wire) + } + switch fieldNum { + case 1: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field Result", wireType) + } + var v int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowDmmaster + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + v |= int(b&0x7F) << shift + if b < 0x80 { + break + } + } + m.Result = bool(v != 0) + case 2: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field Msg", wireType) + } + var stringLen uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowDmmaster + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + stringLen |= uint64(b&0x7F) << shift + if b < 0x80 { + break + } + } + intStringLen := int(stringLen) + if intStringLen < 0 { + return ErrInvalidLengthDmmaster + } + postIndex := iNdEx + intStringLen + if postIndex < 0 { + return ErrInvalidLengthDmmaster + } + if postIndex > l { + return io.ErrUnexpectedEOF + } + m.Msg = string(dAtA[iNdEx:postIndex]) + iNdEx = postIndex + default: + iNdEx = preIndex + skippy, err := skipDmmaster(dAtA[iNdEx:]) + if err != nil { + return err + } + if skippy < 0 { + return ErrInvalidLengthDmmaster + } + if (iNdEx + skippy) < 0 { + return ErrInvalidLengthDmmaster + } + if (iNdEx + skippy) > l { + return io.ErrUnexpectedEOF + } + iNdEx += skippy + } + } + + if iNdEx > l { + return io.ErrUnexpectedEOF + } + return nil +} func (m *RegisterWorkerRequest) Unmarshal(dAtA []byte) error { l := len(dAtA) iNdEx := 0 diff --git a/dm/pb/dmworker.pb.go b/dm/pb/dmworker.pb.go index 5a503bfcc5..cde4f0bf88 100644 --- a/dm/pb/dmworker.pb.go +++ b/dm/pb/dmworker.pb.go @@ -254,37 +254,6 @@ func (RelayOp) EnumDescriptor() ([]byte, []int) { return fileDescriptor_51a1b9e17fd67b10, []int{5} } -type WorkerOp int32 - -const ( - WorkerOp_InvalidWorkerOp WorkerOp = 0 - WorkerOp_StartWorker WorkerOp = 1 - WorkerOp_UpdateConfig WorkerOp = 2 - WorkerOp_StopWorker WorkerOp = 3 -) - -var WorkerOp_name = map[int32]string{ - 0: "InvalidWorkerOp", - 1: "StartWorker", - 2: "UpdateConfig", - 3: "StopWorker", -} - -var WorkerOp_value = map[string]int32{ - "InvalidWorkerOp": 0, - "StartWorker": 1, - "UpdateConfig": 2, - "StopWorker": 3, -} - -func (x WorkerOp) String() string { - return proto.EnumName(WorkerOp_name, int32(x)) -} - -func (WorkerOp) EnumDescriptor() ([]byte, []int) { - return fileDescriptor_51a1b9e17fd67b10, []int{6} -} - type StartSubTaskRequest struct { Task string `protobuf:"bytes,1,opt,name=task,proto3" json:"task,omitempty"` } @@ -2716,110 +2685,6 @@ func (m *QueryWorkerConfigResponse) GetContent() string { return "" } -type MysqlWorkerRequest struct { - Op WorkerOp `protobuf:"varint,1,opt,name=op,proto3,enum=pb.WorkerOp" json:"op,omitempty"` - Config string `protobuf:"bytes,2,opt,name=config,proto3" json:"config,omitempty"` -} - -func (m *MysqlWorkerRequest) Reset() { *m = MysqlWorkerRequest{} } -func (m *MysqlWorkerRequest) String() string { return proto.CompactTextString(m) } -func (*MysqlWorkerRequest) ProtoMessage() {} -func (*MysqlWorkerRequest) Descriptor() ([]byte, []int) { - return fileDescriptor_51a1b9e17fd67b10, []int{37} -} -func (m *MysqlWorkerRequest) XXX_Unmarshal(b []byte) error { - return m.Unmarshal(b) -} -func (m *MysqlWorkerRequest) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { - if deterministic { - return xxx_messageInfo_MysqlWorkerRequest.Marshal(b, m, deterministic) - } else { - b = b[:cap(b)] - n, err := m.MarshalToSizedBuffer(b) - if err != nil { - return nil, err - } - return b[:n], nil - } -} -func (m *MysqlWorkerRequest) XXX_Merge(src proto.Message) { - xxx_messageInfo_MysqlWorkerRequest.Merge(m, src) -} -func (m *MysqlWorkerRequest) XXX_Size() int { - return m.Size() -} -func (m *MysqlWorkerRequest) XXX_DiscardUnknown() { - xxx_messageInfo_MysqlWorkerRequest.DiscardUnknown(m) -} - -var xxx_messageInfo_MysqlWorkerRequest proto.InternalMessageInfo - -func (m *MysqlWorkerRequest) GetOp() WorkerOp { - if m != nil { - return m.Op - } - return WorkerOp_InvalidWorkerOp -} - -func (m *MysqlWorkerRequest) GetConfig() string { - if m != nil { - return m.Config - } - return "" -} - -type MysqlWorkerResponse struct { - Result bool `protobuf:"varint,1,opt,name=result,proto3" json:"result,omitempty"` - Msg string `protobuf:"bytes,2,opt,name=msg,proto3" json:"msg,omitempty"` -} - -func (m *MysqlWorkerResponse) Reset() { *m = MysqlWorkerResponse{} } -func (m *MysqlWorkerResponse) String() string { return proto.CompactTextString(m) } -func (*MysqlWorkerResponse) ProtoMessage() {} -func (*MysqlWorkerResponse) Descriptor() ([]byte, []int) { - return fileDescriptor_51a1b9e17fd67b10, []int{38} -} -func (m *MysqlWorkerResponse) XXX_Unmarshal(b []byte) error { - return m.Unmarshal(b) -} -func (m *MysqlWorkerResponse) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { - if deterministic { - return xxx_messageInfo_MysqlWorkerResponse.Marshal(b, m, deterministic) - } else { - b = b[:cap(b)] - n, err := m.MarshalToSizedBuffer(b) - if err != nil { - return nil, err - } - return b[:n], nil - } -} -func (m *MysqlWorkerResponse) XXX_Merge(src proto.Message) { - xxx_messageInfo_MysqlWorkerResponse.Merge(m, src) -} -func (m *MysqlWorkerResponse) XXX_Size() int { - return m.Size() -} -func (m *MysqlWorkerResponse) XXX_DiscardUnknown() { - xxx_messageInfo_MysqlWorkerResponse.DiscardUnknown(m) -} - -var xxx_messageInfo_MysqlWorkerResponse proto.InternalMessageInfo - -func (m *MysqlWorkerResponse) GetResult() bool { - if m != nil { - return m.Result - } - return false -} - -func (m *MysqlWorkerResponse) GetMsg() string { - if m != nil { - return m.Msg - } - return "" -} - func init() { proto.RegisterEnum("pb.TaskOp", TaskOp_name, TaskOp_value) proto.RegisterEnum("pb.SQLOp", SQLOp_name, SQLOp_value) @@ -2827,7 +2692,6 @@ func init() { proto.RegisterEnum("pb.UnitType", UnitType_name, UnitType_value) proto.RegisterEnum("pb.ErrorType", ErrorType_name, ErrorType_value) proto.RegisterEnum("pb.RelayOp", RelayOp_name, RelayOp_value) - proto.RegisterEnum("pb.WorkerOp", WorkerOp_name, WorkerOp_value) proto.RegisterType((*StartSubTaskRequest)(nil), "pb.StartSubTaskRequest") proto.RegisterType((*UpdateRelayRequest)(nil), "pb.UpdateRelayRequest") proto.RegisterType((*MigrateRelayRequest)(nil), "pb.MigrateRelayRequest") @@ -2865,145 +2729,138 @@ func init() { proto.RegisterType((*PurgeRelayRequest)(nil), "pb.PurgeRelayRequest") proto.RegisterType((*QueryWorkerConfigRequest)(nil), "pb.QueryWorkerConfigRequest") proto.RegisterType((*QueryWorkerConfigResponse)(nil), "pb.QueryWorkerConfigResponse") - proto.RegisterType((*MysqlWorkerRequest)(nil), "pb.MysqlWorkerRequest") - proto.RegisterType((*MysqlWorkerResponse)(nil), "pb.MysqlWorkerResponse") } func init() { proto.RegisterFile("dmworker.proto", fileDescriptor_51a1b9e17fd67b10) } var fileDescriptor_51a1b9e17fd67b10 = []byte{ - // 2094 bytes of a gzipped FileDescriptorProto + // 2005 bytes of a gzipped FileDescriptorProto 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0xb4, 0x58, 0x4f, 0x73, 0xe3, 0x58, - 0x11, 0xb7, 0x24, 0xdb, 0xb1, 0xdb, 0x4e, 0x46, 0x79, 0xc9, 0x66, 0x3d, 0x66, 0x36, 0x18, 0xcd, - 0xd6, 0x6e, 0xd6, 0x87, 0x14, 0x04, 0x28, 0xaa, 0xa0, 0x96, 0x65, 0xc7, 0xc9, 0xcc, 0x64, 0x71, - 0x66, 0x12, 0x39, 0x03, 0xdc, 0x28, 0xc5, 0x7e, 0x71, 0x54, 0x91, 0x25, 0x8d, 0xfe, 0x24, 0xe4, - 0x33, 0x50, 0x45, 0x71, 0xa5, 0x38, 0x52, 0x54, 0x71, 0xe0, 0x23, 0x70, 0xe3, 0x00, 0xc7, 0x3d, - 0x72, 0xa4, 0x66, 0xbe, 0x06, 0x07, 0xaa, 0xfb, 0x3d, 0x49, 0x4f, 0xf1, 0x9f, 0xd9, 0xc3, 0xec, - 0xc5, 0xe5, 0xfe, 0xf3, 0xfa, 0xf5, 0xfb, 0x75, 0xab, 0xfb, 0xbd, 0x86, 0x8d, 0xc9, 0xec, 0x36, - 0x88, 0xae, 0x79, 0xb4, 0x1f, 0x46, 0x41, 0x12, 0x30, 0x3d, 0xbc, 0xb0, 0x3e, 0x83, 0xad, 0x51, - 0xe2, 0x44, 0xc9, 0x28, 0xbd, 0x38, 0x77, 0xe2, 0x6b, 0x9b, 0xbf, 0x4e, 0x79, 0x9c, 0x30, 0x06, - 0xd5, 0xc4, 0x89, 0xaf, 0x3b, 0x5a, 0x4f, 0xdb, 0x6b, 0xda, 0xf4, 0xdf, 0xda, 0x07, 0xf6, 0x2a, - 0x9c, 0x38, 0x09, 0xb7, 0xb9, 0xe7, 0xdc, 0x65, 0x9a, 0x1d, 0x58, 0x1b, 0x07, 0x7e, 0xc2, 0xfd, - 0x44, 0x2a, 0x67, 0xa4, 0x35, 0x82, 0xad, 0x13, 0x77, 0x1a, 0xdd, 0x5f, 0xb0, 0x0b, 0xf0, 0xc4, - 0xf5, 0xbd, 0x60, 0xfa, 0xc2, 0x99, 0x71, 0xb9, 0x46, 0xe1, 0xb0, 0x47, 0xd0, 0x14, 0xd4, 0x69, - 0x10, 0x77, 0xf4, 0x9e, 0xb6, 0xb7, 0x6e, 0x17, 0x0c, 0xeb, 0x19, 0x7c, 0xf0, 0x32, 0xe4, 0x68, - 0xf4, 0x9e, 0xc7, 0x5d, 0xd0, 0x83, 0x90, 0xcc, 0x6d, 0x1c, 0xc0, 0x7e, 0x78, 0xb1, 0x8f, 0xc2, - 0x97, 0xa1, 0xad, 0x07, 0x21, 0x9e, 0xc6, 0xc7, 0xcd, 0x74, 0x71, 0x1a, 0xfc, 0x6f, 0xdd, 0xc0, - 0xce, 0x7d, 0x43, 0x71, 0x18, 0xf8, 0x31, 0x5f, 0x69, 0x69, 0x07, 0xea, 0x11, 0x8f, 0x53, 0x2f, - 0x21, 0x5b, 0x0d, 0x5b, 0x52, 0xc8, 0x8f, 0x83, 0x34, 0x1a, 0xf3, 0x8e, 0x41, 0x7b, 0x48, 0x8a, - 0x99, 0x60, 0xcc, 0xe2, 0x69, 0xa7, 0x4a, 0x4c, 0xfc, 0x6b, 0xf5, 0x61, 0x5b, 0xa0, 0xf8, 0x0d, - 0x10, 0xdf, 0x03, 0x76, 0x96, 0xf2, 0xe8, 0x6e, 0x94, 0x38, 0x49, 0x1a, 0x2b, 0x9a, 0x7e, 0x01, - 0x9d, 0x38, 0xcd, 0xa7, 0xb0, 0x49, 0x9a, 0x47, 0x51, 0x14, 0x44, 0xab, 0x14, 0xff, 0xac, 0x41, - 0xe7, 0xb9, 0xe3, 0x4f, 0xbc, 0x6c, 0xff, 0xd1, 0xd9, 0x70, 0x95, 0x65, 0xf6, 0x90, 0xd0, 0xd0, - 0x09, 0x8d, 0x26, 0xa2, 0x31, 0x3a, 0x1b, 0x16, 0xb0, 0x3a, 0xd1, 0x34, 0xee, 0x18, 0x3d, 0x03, - 0xd5, 0xf1, 0x3f, 0x46, 0xef, 0x22, 0x8f, 0x9e, 0x38, 0x76, 0xc1, 0xc0, 0xd8, 0xc7, 0xaf, 0xbd, - 0x53, 0x27, 0x49, 0x78, 0xe4, 0x77, 0x6a, 0x22, 0xf6, 0x05, 0xc7, 0xfa, 0x0d, 0x6c, 0x0f, 0x82, - 0xd9, 0x2c, 0xf0, 0x7f, 0x4d, 0x79, 0x9a, 0x87, 0xa4, 0x80, 0x5d, 0x5b, 0x02, 0xbb, 0xbe, 0x08, - 0x76, 0xa3, 0x80, 0xfd, 0x9f, 0x1a, 0x6c, 0x95, 0xb0, 0x7c, 0x5f, 0x96, 0xd9, 0x4f, 0x60, 0x3d, - 0x96, 0x50, 0x92, 0xe9, 0x4e, 0xb5, 0x67, 0xec, 0xb5, 0x0e, 0x36, 0x09, 0x2b, 0x55, 0x60, 0x97, - 0xf5, 0xd8, 0x0f, 0xa0, 0x15, 0xe1, 0x87, 0x21, 0x97, 0x21, 0x1a, 0xad, 0x83, 0x07, 0xb8, 0xcc, - 0x2e, 0xd8, 0xb6, 0xaa, 0x63, 0xfd, 0x43, 0x93, 0x19, 0x21, 0xe3, 0xfc, 0xde, 0x0e, 0xf1, 0x23, - 0x68, 0x4b, 0xe7, 0xc8, 0xb2, 0x3c, 0x83, 0xa9, 0x9c, 0x41, 0xec, 0x58, 0xd2, 0x62, 0xfb, 0x00, - 0xe4, 0xaa, 0x58, 0x23, 0x0e, 0xb0, 0x91, 0x1f, 0x40, 0xac, 0x50, 0x34, 0xac, 0xbf, 0x6a, 0xd0, - 0x1a, 0x5c, 0xf1, 0x71, 0x86, 0xc0, 0x0e, 0xd4, 0x43, 0x27, 0x8e, 0xf9, 0x24, 0xf3, 0x5b, 0x50, - 0x6c, 0x1b, 0x6a, 0x49, 0x90, 0x38, 0x1e, 0xb9, 0x5d, 0xb3, 0x05, 0x41, 0xc9, 0x93, 0x8e, 0xc7, - 0x3c, 0x8e, 0x2f, 0x53, 0x8f, 0x9c, 0xaf, 0xd9, 0x0a, 0x07, 0xad, 0x5d, 0x3a, 0xae, 0xc7, 0x27, - 0x94, 0x77, 0x35, 0x5b, 0x52, 0x58, 0xa1, 0x6e, 0x9d, 0xc8, 0x77, 0xfd, 0x29, 0xb9, 0x58, 0xb3, - 0x33, 0x12, 0x57, 0x4c, 0x78, 0xe2, 0xb8, 0x5e, 0xa7, 0xde, 0xd3, 0xf6, 0xda, 0xb6, 0xa4, 0xac, - 0x36, 0xc0, 0x61, 0x3a, 0x0b, 0x25, 0xe8, 0x7f, 0xd0, 0x00, 0x86, 0x81, 0x33, 0x91, 0x4e, 0x7f, - 0x0c, 0xeb, 0x97, 0xae, 0xef, 0xc6, 0x57, 0x7c, 0xf2, 0xe4, 0x2e, 0xe1, 0x31, 0xf9, 0x6e, 0xd8, - 0x65, 0x26, 0x3a, 0x4b, 0x5e, 0x0b, 0x15, 0x9d, 0x54, 0x14, 0x0e, 0xeb, 0x42, 0x23, 0x8c, 0x82, - 0x69, 0xc4, 0xe3, 0x58, 0xc6, 0x21, 0xa7, 0x71, 0xed, 0x8c, 0x27, 0x8e, 0x28, 0x7a, 0xf2, 0x23, - 0x52, 0x38, 0xd6, 0xef, 0x35, 0x58, 0x1f, 0x5d, 0x39, 0xd1, 0xc4, 0xf5, 0xa7, 0xcf, 0xa2, 0x20, - 0xa5, 0xb2, 0x94, 0x38, 0xd1, 0x94, 0x67, 0x35, 0x58, 0x52, 0xf8, 0x85, 0x1e, 0x1e, 0x0e, 0x71, - 0x7f, 0xfa, 0x42, 0xf1, 0x3f, 0xee, 0x7c, 0xe9, 0x46, 0x71, 0x82, 0x1f, 0xa8, 0xdc, 0x39, 0xa3, - 0x29, 0x61, 0xee, 0xfc, 0x31, 0x41, 0x68, 0x50, 0xc2, 0x10, 0x85, 0x6b, 0x52, 0x5f, 0x4a, 0x6a, - 0x24, 0xc9, 0x69, 0xeb, 0x2f, 0x06, 0xc0, 0xe8, 0xce, 0x1f, 0x4b, 0x78, 0x7a, 0xd0, 0xa2, 0x63, - 0x1e, 0xdd, 0x70, 0x3f, 0xc9, 0xc0, 0x51, 0x59, 0x68, 0x8c, 0xc8, 0xf3, 0x30, 0x03, 0x26, 0xa7, - 0xb1, 0x7c, 0x44, 0x7c, 0xcc, 0xfd, 0x04, 0x85, 0x06, 0x09, 0x0b, 0x06, 0xb3, 0xa0, 0x3d, 0x73, - 0xe2, 0x84, 0x47, 0x25, 0x68, 0x4a, 0x3c, 0xd6, 0x07, 0x53, 0xa5, 0x9f, 0x25, 0xee, 0x44, 0x16, - 0x9a, 0x39, 0x3e, 0xda, 0xa3, 0x43, 0x64, 0xf6, 0xea, 0xc2, 0x9e, 0xca, 0x43, 0x7b, 0x2a, 0x4d, - 0xf6, 0xd6, 0x84, 0xbd, 0xfb, 0x7c, 0xb4, 0x77, 0xe1, 0x05, 0xe3, 0x6b, 0xd7, 0x9f, 0x12, 0xec, - 0x0d, 0x82, 0xaa, 0xc4, 0x63, 0x9f, 0x83, 0x99, 0xfa, 0x11, 0x8f, 0x03, 0xef, 0x86, 0x4f, 0x28, - 0x7a, 0x71, 0xa7, 0xa9, 0x54, 0x0c, 0x35, 0xae, 0xf6, 0x9c, 0xaa, 0x12, 0x21, 0x10, 0x9f, 0x8c, - 0x8c, 0xd0, 0x2e, 0x80, 0x28, 0xb3, 0xe7, 0x77, 0x21, 0xef, 0xb4, 0x44, 0xce, 0x14, 0x1c, 0xeb, - 0x5f, 0x3a, 0xb4, 0x94, 0xb2, 0x32, 0x07, 0xa5, 0xf6, 0x0d, 0xa1, 0xd4, 0x97, 0x40, 0xd9, 0xcb, - 0x8a, 0x59, 0x7a, 0x71, 0xe8, 0x46, 0x32, 0xb1, 0x54, 0x56, 0xae, 0x51, 0x8a, 0x9d, 0xca, 0x62, - 0x7b, 0xf0, 0x40, 0x21, 0x95, 0xc8, 0xdd, 0x67, 0xb3, 0x7d, 0x60, 0xc4, 0x1a, 0x38, 0xc9, 0xf8, - 0xea, 0x55, 0x78, 0x42, 0xde, 0x50, 0xf8, 0x1a, 0xf6, 0x02, 0x09, 0xfb, 0x2e, 0xd4, 0xe2, 0xc4, - 0x99, 0x72, 0x8a, 0x5c, 0xd6, 0xc7, 0x90, 0x61, 0x0b, 0x3e, 0xfb, 0x2c, 0xaf, 0xa0, 0x0d, 0xaa, - 0x62, 0x14, 0x8b, 0xd3, 0x28, 0xc0, 0xda, 0x62, 0x93, 0x20, 0x2b, 0xaa, 0xd6, 0xff, 0x74, 0x58, - 0x2f, 0xd5, 0xf5, 0x85, 0x6d, 0x33, 0xdf, 0x51, 0x5f, 0xb2, 0x63, 0x0f, 0xaa, 0xa9, 0xef, 0x26, - 0x84, 0xd4, 0xc6, 0x41, 0x1b, 0xe5, 0xaf, 0x7c, 0x37, 0xc1, 0x60, 0xd9, 0x24, 0x51, 0x7c, 0xaa, - 0xbe, 0xc3, 0x27, 0xf6, 0x7d, 0xd8, 0x2a, 0x32, 0xe5, 0xf0, 0x70, 0x38, 0x0c, 0xc6, 0xd7, 0xc7, - 0x87, 0x12, 0xbd, 0x45, 0x22, 0xc6, 0x44, 0x0b, 0xa0, 0x8c, 0x7f, 0x5e, 0x11, 0x4d, 0xe0, 0x53, - 0xa8, 0x8d, 0xb1, 0x3a, 0x13, 0x4a, 0xb2, 0x15, 0x29, 0xe5, 0xfa, 0x79, 0xc5, 0x16, 0x72, 0xf6, - 0x31, 0x54, 0x27, 0xe9, 0x2c, 0x94, 0x58, 0x51, 0xc5, 0x2f, 0xea, 0xe5, 0xf3, 0x8a, 0x4d, 0x52, - 0xd4, 0xf2, 0x02, 0x67, 0xd2, 0x69, 0x16, 0x5a, 0x45, 0x19, 0x45, 0x2d, 0x94, 0xa2, 0x16, 0xa6, - 0x30, 0xa5, 0xb3, 0xd4, 0x2a, 0xaa, 0x09, 0x6a, 0xa1, 0xf4, 0x49, 0x03, 0xea, 0xb1, 0xa8, 0xc6, - 0x3f, 0x87, 0xcd, 0x12, 0xfa, 0x43, 0x37, 0x26, 0xa8, 0x84, 0xb8, 0xa3, 0x2d, 0x6b, 0xbe, 0xd9, - 0xfa, 0x5d, 0x00, 0x3a, 0x93, 0xe8, 0x60, 0xb2, 0x13, 0x6a, 0xc5, 0x45, 0xe1, 0x23, 0x68, 0xe2, - 0x59, 0x56, 0x88, 0xf1, 0x10, 0xcb, 0xc4, 0x21, 0xb4, 0xc9, 0xfb, 0xb3, 0xe1, 0x12, 0x0d, 0x76, - 0x00, 0xdb, 0xa2, 0x2f, 0xe5, 0x77, 0x5a, 0x37, 0x71, 0x03, 0x5f, 0x7e, 0x58, 0x0b, 0x65, 0x58, - 0x31, 0x39, 0x9a, 0x1b, 0x9d, 0x0d, 0xb3, 0x92, 0x9d, 0xd1, 0xd6, 0x8f, 0xa1, 0x89, 0x3b, 0x8a, - 0xed, 0xf6, 0xa0, 0x4e, 0x82, 0x0c, 0x07, 0x33, 0x87, 0x53, 0x3a, 0x64, 0x4b, 0x39, 0xc2, 0x50, - 0x34, 0xe6, 0x05, 0x07, 0xf9, 0x93, 0x0e, 0x6d, 0xb5, 0xf3, 0x7f, 0x5b, 0x49, 0xce, 0x94, 0x0b, - 0x72, 0x96, 0x87, 0x9f, 0x64, 0x79, 0xa8, 0xdc, 0x28, 0x8a, 0x98, 0x15, 0x69, 0xf8, 0x58, 0xa6, - 0x61, 0x9d, 0xd4, 0xd6, 0xb3, 0x34, 0xcc, 0xb4, 0x44, 0x16, 0x3e, 0x96, 0x59, 0xb8, 0x56, 0x28, - 0xe5, 0x01, 0xcc, 0x93, 0xf0, 0xb1, 0x4c, 0xc2, 0x46, 0xa1, 0x94, 0x83, 0x9a, 0xe7, 0xe0, 0x1a, - 0xd4, 0x08, 0x3c, 0xeb, 0xa7, 0x60, 0xaa, 0xd0, 0x50, 0x06, 0x7e, 0x22, 0x85, 0x25, 0xe0, 0xd5, - 0x9b, 0x93, 0x5c, 0xfb, 0x1a, 0xd6, 0x4b, 0x9f, 0x30, 0x16, 0x6e, 0x37, 0x1e, 0x38, 0xfe, 0x98, - 0x7b, 0xf9, 0x3d, 0x48, 0xe1, 0x28, 0x21, 0xd5, 0x0b, 0xcb, 0xd2, 0x44, 0x29, 0xa4, 0xca, 0x6d, - 0xc6, 0x28, 0xdd, 0x66, 0xfe, 0xae, 0x41, 0xfd, 0x5c, 0x04, 0xb1, 0x03, 0x6b, 0x47, 0x51, 0x34, - 0x08, 0x26, 0x22, 0x8e, 0x35, 0x3b, 0x23, 0x31, 0xc5, 0xf0, 0xaf, 0xe7, 0xc4, 0xb1, 0xbc, 0x75, - 0xe5, 0xb4, 0x94, 0x8d, 0xc6, 0x41, 0xc8, 0xe5, 0xb5, 0x2b, 0xa7, 0xa5, 0x6c, 0xc8, 0x6f, 0xb8, - 0x27, 0xaf, 0x5d, 0x39, 0x8d, 0xbb, 0x9d, 0xf0, 0x38, 0xc6, 0x04, 0x11, 0x95, 0x28, 0x23, 0x71, - 0x95, 0xed, 0xdc, 0x0e, 0x9c, 0x34, 0xe6, 0xb2, 0xe9, 0xe6, 0xb4, 0xc5, 0xa1, 0xad, 0x1e, 0x8f, - 0x7d, 0x0f, 0xaa, 0xd4, 0xd3, 0xc4, 0x83, 0x8c, 0x62, 0x43, 0x02, 0x91, 0x44, 0xf8, 0x9b, 0xa5, - 0xaf, 0x5e, 0x7c, 0x65, 0xbd, 0x2c, 0x1c, 0x06, 0x45, 0x54, 0x3c, 0xe3, 0x4a, 0x81, 0xe8, 0x42, - 0x67, 0x74, 0xeb, 0x26, 0xe3, 0x2b, 0xfa, 0x0c, 0x44, 0x9f, 0x90, 0xef, 0x20, 0xeb, 0x00, 0xb6, - 0xe4, 0xdb, 0xb0, 0xf4, 0x72, 0xfd, 0x8e, 0xf2, 0x30, 0x6c, 0xe5, 0xd7, 0x5c, 0xf1, 0x18, 0xb2, - 0x52, 0xd8, 0x2e, 0xaf, 0x91, 0x77, 0xf3, 0x55, 0x8b, 0xde, 0xc3, 0x73, 0xf2, 0x16, 0x36, 0x4f, - 0xd3, 0x68, 0x5a, 0x76, 0xb4, 0x0b, 0x0d, 0xd7, 0x77, 0xc6, 0x89, 0x7b, 0xc3, 0x65, 0x46, 0xe5, - 0x34, 0xbd, 0x33, 0x5d, 0xf9, 0x16, 0x36, 0x6c, 0xfa, 0x2f, 0xae, 0x84, 0x1e, 0xa7, 0xef, 0x3b, - 0xbf, 0x12, 0x0a, 0x9a, 0x5c, 0x11, 0x3d, 0xbd, 0x2a, 0x5d, 0x21, 0x0a, 0xf1, 0xa3, 0x97, 0x88, - 0x78, 0xa9, 0x0d, 0x02, 0xff, 0xd2, 0x9d, 0x66, 0xf8, 0xdd, 0xc2, 0xc3, 0x05, 0xb2, 0xf7, 0xf6, - 0x58, 0x51, 0x46, 0x0e, 0xd5, 0xf2, 0xc8, 0xe1, 0x2b, 0x60, 0x27, 0x77, 0xf1, 0x6b, 0x2f, 0x7b, - 0x3e, 0x0a, 0x38, 0x1e, 0x29, 0x21, 0xa0, 0x1a, 0x24, 0xc4, 0x45, 0x0c, 0xc6, 0xe4, 0x61, 0xb6, - 0xaf, 0xa0, 0xac, 0x2f, 0x60, 0xab, 0x64, 0xeb, 0x1d, 0xee, 0xcf, 0xe5, 0x60, 0xff, 0xb7, 0x50, - 0x17, 0x93, 0x03, 0xb6, 0x0e, 0xcd, 0x63, 0xff, 0xc6, 0xf1, 0xdc, 0xc9, 0xcb, 0xd0, 0xac, 0xb0, - 0x06, 0x54, 0x47, 0x49, 0x10, 0x9a, 0x1a, 0x6b, 0x42, 0xed, 0x14, 0x93, 0xde, 0xd4, 0x19, 0x40, - 0x1d, 0x2b, 0xc2, 0x8c, 0x9b, 0x06, 0xb2, 0x69, 0x28, 0x63, 0x56, 0x91, 0x2d, 0xc6, 0x05, 0x66, - 0x8d, 0x6d, 0x00, 0x7c, 0x99, 0x26, 0x81, 0x54, 0xab, 0xf7, 0xfb, 0x50, 0xa3, 0xc7, 0x38, 0x19, - 0xfc, 0xe5, 0xf1, 0xa9, 0x59, 0x61, 0x2d, 0x58, 0xb3, 0x8f, 0x4e, 0x87, 0x5f, 0x0e, 0x8e, 0x4c, - 0x0d, 0xd7, 0x1e, 0xbf, 0xf8, 0xea, 0x68, 0x70, 0x6e, 0xea, 0xfd, 0x5f, 0x91, 0xc9, 0x29, 0xc2, - 0xd9, 0x96, 0xbe, 0x10, 0x6d, 0x56, 0xd8, 0x1a, 0x18, 0x2f, 0xf8, 0xad, 0xa9, 0xd1, 0xe2, 0xd4, - 0xc7, 0x97, 0x91, 0xf0, 0x87, 0x5c, 0x9b, 0x98, 0x06, 0x0a, 0xd0, 0xe1, 0x90, 0x4f, 0xcc, 0x2a, - 0x6b, 0x43, 0xe3, 0xa9, 0x7c, 0xea, 0x98, 0xb5, 0xfe, 0x4b, 0x68, 0x64, 0x15, 0x9d, 0x3d, 0x80, - 0x96, 0x34, 0x8d, 0x2c, 0xb3, 0x82, 0xe7, 0xa0, 0xba, 0x6d, 0x6a, 0xe8, 0x22, 0xd6, 0x66, 0x53, - 0xc7, 0x7f, 0x58, 0x80, 0x4d, 0x83, 0xdc, 0xbe, 0xf3, 0xc7, 0x66, 0x15, 0x15, 0x29, 0x81, 0xcd, - 0x49, 0xff, 0x67, 0xd0, 0xcc, 0x3f, 0x6f, 0x74, 0xf6, 0x95, 0x7f, 0xed, 0x07, 0xb7, 0x3e, 0xf1, - 0xc4, 0x01, 0x8f, 0x7e, 0xc7, 0xb1, 0x9f, 0x99, 0x1a, 0x6e, 0x48, 0xf6, 0x9f, 0x52, 0xd3, 0x34, - 0xf5, 0xfe, 0x09, 0xac, 0xc9, 0xcf, 0x8b, 0x31, 0xd8, 0x90, 0xce, 0x48, 0x8e, 0x59, 0xc1, 0x38, - 0xe0, 0x39, 0xc4, 0x56, 0x1a, 0xe2, 0x49, 0x47, 0x14, 0xb4, 0x8e, 0xe6, 0x04, 0xb6, 0x82, 0x61, - 0xf4, 0x6d, 0x68, 0x64, 0xa9, 0xc2, 0xb6, 0xe0, 0x81, 0xb4, 0x97, 0xb1, 0xcc, 0x0a, 0xae, 0xa0, - 0x40, 0x09, 0x96, 0xa9, 0x91, 0xc3, 0x14, 0x2e, 0x91, 0xf4, 0xa6, 0x8e, 0x9b, 0xe0, 0x9e, 0x52, - 0xc3, 0x38, 0xf8, 0xdb, 0x1a, 0xd4, 0x05, 0xc1, 0x06, 0xd0, 0x56, 0x67, 0x6f, 0xec, 0x43, 0xd9, - 0x3f, 0xef, 0x4f, 0xe3, 0xba, 0x1d, 0xea, 0x80, 0x0b, 0x06, 0x23, 0x56, 0x85, 0x1d, 0xc3, 0x46, - 0x79, 0x8e, 0xc5, 0x1e, 0xa2, 0xf6, 0xc2, 0x21, 0x59, 0xb7, 0xbb, 0x48, 0x94, 0x9b, 0x3a, 0x82, - 0xf5, 0xd2, 0x68, 0x8a, 0xd1, 0xbe, 0x8b, 0xa6, 0x55, 0x2b, 0x3d, 0xfa, 0x05, 0xb4, 0x94, 0x49, - 0x0b, 0xdb, 0x41, 0xd5, 0xf9, 0x31, 0x56, 0xf7, 0xc3, 0x39, 0x7e, 0x6e, 0xe1, 0x73, 0x80, 0x62, - 0xca, 0xc1, 0x3e, 0xc8, 0x15, 0xd5, 0xe9, 0x56, 0x77, 0xe7, 0x3e, 0x3b, 0x5f, 0xfe, 0x14, 0x40, - 0x8e, 0xb8, 0xce, 0x86, 0x31, 0x7b, 0x84, 0x7a, 0xcb, 0x46, 0x5e, 0x2b, 0x0f, 0x72, 0x02, 0x9b, - 0x73, 0x2d, 0x42, 0x98, 0x5b, 0xd6, 0x39, 0x56, 0x9a, 0x1b, 0x40, 0x5b, 0xed, 0x10, 0x22, 0xdc, - 0x0b, 0xfa, 0x8c, 0x30, 0xb2, 0xa8, 0x99, 0x58, 0x15, 0xf6, 0x05, 0x40, 0x51, 0xef, 0x05, 0x34, - 0x73, 0xf5, 0x7f, 0xa5, 0x17, 0xcf, 0x60, 0x53, 0x99, 0xe2, 0x8a, 0x34, 0x15, 0x31, 0x9a, 0x1f, - 0xee, 0xae, 0x34, 0x64, 0xcb, 0x91, 0xa3, 0x5a, 0xe4, 0x05, 0x3a, 0xcb, 0xfa, 0x42, 0xf7, 0xa3, - 0x25, 0x52, 0x15, 0x22, 0x75, 0x64, 0x2c, 0x20, 0x5a, 0x30, 0x44, 0x7e, 0xc7, 0x09, 0x99, 0x04, - 0x4f, 0xa9, 0xdf, 0xe2, 0x88, 0xf3, 0xcd, 0x41, 0xa4, 0xe1, 0x82, 0x42, 0x6f, 0x55, 0x9e, 0x74, - 0xfe, 0xfd, 0x66, 0x57, 0xfb, 0xfa, 0xcd, 0xae, 0xf6, 0xdf, 0x37, 0xbb, 0xda, 0x1f, 0xdf, 0xee, - 0x56, 0xbe, 0x7e, 0xbb, 0x5b, 0xf9, 0xcf, 0xdb, 0xdd, 0xca, 0x45, 0x9d, 0x06, 0xe8, 0x3f, 0xfc, - 0x7f, 0x00, 0x00, 0x00, 0xff, 0xff, 0x79, 0x63, 0x20, 0x05, 0x52, 0x17, 0x00, 0x00, + 0x11, 0xb7, 0x24, 0xdb, 0xb1, 0xdb, 0x4e, 0x46, 0x79, 0xc9, 0x66, 0x35, 0x66, 0x37, 0x18, 0xcd, + 0xd6, 0x6e, 0xd6, 0x87, 0x14, 0x04, 0x28, 0xaa, 0xa0, 0x16, 0xd8, 0x71, 0x32, 0x33, 0x01, 0x67, + 0x26, 0x91, 0x33, 0xc0, 0x8d, 0x52, 0xac, 0x17, 0x47, 0x15, 0x5b, 0xd2, 0xe8, 0x4f, 0x42, 0x3e, + 0x03, 0x55, 0x14, 0x57, 0x8a, 0x23, 0xc5, 0x8d, 0x8f, 0xc0, 0x8d, 0x03, 0x1c, 0xf7, 0xc8, 0x0d, + 0x6a, 0xe6, 0x6b, 0x70, 0xa0, 0xba, 0xdf, 0x93, 0xf4, 0x94, 0xd8, 0x66, 0x0f, 0xc3, 0xc5, 0xe5, + 0xfe, 0xf3, 0xfa, 0x75, 0xff, 0xba, 0xd5, 0xef, 0xbd, 0x86, 0x0d, 0x6f, 0x7e, 0x1b, 0xc6, 0xd7, + 0x3c, 0xde, 0x8f, 0xe2, 0x30, 0x0d, 0x99, 0x1e, 0x5d, 0xd8, 0x9f, 0xc3, 0xd6, 0x38, 0x75, 0xe3, + 0x74, 0x9c, 0x5d, 0x9c, 0xbb, 0xc9, 0xb5, 0xc3, 0xdf, 0x64, 0x3c, 0x49, 0x19, 0x83, 0x7a, 0xea, + 0x26, 0xd7, 0x96, 0xd6, 0xd7, 0xf6, 0xda, 0x0e, 0xfd, 0xb7, 0xf7, 0x81, 0xbd, 0x8e, 0x3c, 0x37, + 0xe5, 0x0e, 0x9f, 0xb9, 0x77, 0xb9, 0xa6, 0x05, 0x6b, 0x93, 0x30, 0x48, 0x79, 0x90, 0x4a, 0xe5, + 0x9c, 0xb4, 0xc7, 0xb0, 0x75, 0xe2, 0x4f, 0xe3, 0xfb, 0x0b, 0x76, 0x01, 0x9e, 0xfa, 0xc1, 0x2c, + 0x9c, 0xbe, 0x74, 0xe7, 0x5c, 0xae, 0x51, 0x38, 0xec, 0x23, 0x68, 0x0b, 0xea, 0x34, 0x4c, 0x2c, + 0xbd, 0xaf, 0xed, 0xad, 0x3b, 0x25, 0xc3, 0x7e, 0x0e, 0x1f, 0xbc, 0x8a, 0x38, 0x1a, 0xbd, 0xe7, + 0x71, 0x0f, 0xf4, 0x30, 0x22, 0x73, 0x1b, 0x07, 0xb0, 0x1f, 0x5d, 0xec, 0xa3, 0xf0, 0x55, 0xe4, + 0xe8, 0x61, 0x84, 0xd1, 0x04, 0xb8, 0x99, 0x2e, 0xa2, 0xc1, 0xff, 0xf6, 0x0d, 0xec, 0xdc, 0x37, + 0x94, 0x44, 0x61, 0x90, 0xf0, 0x95, 0x96, 0x76, 0xa0, 0x19, 0xf3, 0x24, 0x9b, 0xa5, 0x64, 0xab, + 0xe5, 0x48, 0x0a, 0xf9, 0x49, 0x98, 0xc5, 0x13, 0x6e, 0x19, 0xb4, 0x87, 0xa4, 0x98, 0x09, 0xc6, + 0x3c, 0x99, 0x5a, 0x75, 0x62, 0xe2, 0x5f, 0x7b, 0x00, 0xdb, 0x02, 0xc5, 0xaf, 0x81, 0xf8, 0x1e, + 0xb0, 0xb3, 0x8c, 0xc7, 0x77, 0xe3, 0xd4, 0x4d, 0xb3, 0x44, 0xd1, 0x0c, 0x4a, 0xe8, 0x44, 0x34, + 0x9f, 0xc1, 0x26, 0x69, 0x1e, 0xc5, 0x71, 0x18, 0xaf, 0x52, 0xfc, 0xa3, 0x06, 0xd6, 0x0b, 0x37, + 0xf0, 0x66, 0xf9, 0xfe, 0xe3, 0xb3, 0xd1, 0x2a, 0xcb, 0xec, 0x31, 0xa1, 0xa1, 0x13, 0x1a, 0x6d, + 0x44, 0x63, 0x7c, 0x36, 0x2a, 0x61, 0x75, 0xe3, 0x69, 0x62, 0x19, 0x7d, 0x03, 0xd5, 0xf1, 0x3f, + 0x66, 0xef, 0xa2, 0xc8, 0x9e, 0x08, 0xbb, 0x64, 0x60, 0xee, 0x93, 0x37, 0xb3, 0x53, 0x37, 0x4d, + 0x79, 0x1c, 0x58, 0x0d, 0x91, 0xfb, 0x92, 0x63, 0xff, 0x0a, 0xb6, 0x87, 0xe1, 0x7c, 0x1e, 0x06, + 0xbf, 0xa4, 0x3a, 0x2d, 0x52, 0x52, 0xc2, 0xae, 0x2d, 0x81, 0x5d, 0x5f, 0x04, 0xbb, 0x51, 0xc2, + 0xfe, 0x37, 0x0d, 0xb6, 0x2a, 0x58, 0xbe, 0x2f, 0xcb, 0xec, 0x07, 0xb0, 0x9e, 0x48, 0x28, 0xc9, + 0xb4, 0x55, 0xef, 0x1b, 0x7b, 0x9d, 0x83, 0x4d, 0xc2, 0x4a, 0x15, 0x38, 0x55, 0x3d, 0xf6, 0x1d, + 0xe8, 0xc4, 0xf8, 0x61, 0xc8, 0x65, 0x88, 0x46, 0xe7, 0xe0, 0x11, 0x2e, 0x73, 0x4a, 0xb6, 0xa3, + 0xea, 0xd8, 0x7f, 0xd5, 0x64, 0x45, 0xc8, 0x3c, 0xbf, 0xb7, 0x20, 0xbe, 0x07, 0x5d, 0xe9, 0x1c, + 0x59, 0x96, 0x31, 0x98, 0x4a, 0x0c, 0x62, 0xc7, 0x8a, 0x16, 0xdb, 0x07, 0x20, 0x57, 0xc5, 0x1a, + 0x11, 0xc0, 0x46, 0x11, 0x80, 0x58, 0xa1, 0x68, 0xd8, 0x7f, 0xd6, 0xa0, 0x33, 0xbc, 0xe2, 0x93, + 0x1c, 0x81, 0x1d, 0x68, 0x46, 0x6e, 0x92, 0x70, 0x2f, 0xf7, 0x5b, 0x50, 0x6c, 0x1b, 0x1a, 0x69, + 0x98, 0xba, 0x33, 0x72, 0xbb, 0xe1, 0x08, 0x82, 0x8a, 0x27, 0x9b, 0x4c, 0x78, 0x92, 0x5c, 0x66, + 0x33, 0x72, 0xbe, 0xe1, 0x28, 0x1c, 0xb4, 0x76, 0xe9, 0xfa, 0x33, 0xee, 0x51, 0xdd, 0x35, 0x1c, + 0x49, 0x61, 0x87, 0xba, 0x75, 0xe3, 0xc0, 0x0f, 0xa6, 0xe4, 0x62, 0xc3, 0xc9, 0x49, 0x5c, 0xe1, + 0xf1, 0xd4, 0xf5, 0x67, 0x56, 0xb3, 0xaf, 0xed, 0x75, 0x1d, 0x49, 0xd9, 0x5d, 0x80, 0xc3, 0x6c, + 0x1e, 0x49, 0xd0, 0x7f, 0xa7, 0x01, 0x8c, 0x42, 0xd7, 0x93, 0x4e, 0x7f, 0x02, 0xeb, 0x97, 0x7e, + 0xe0, 0x27, 0x57, 0xdc, 0x7b, 0x7a, 0x97, 0xf2, 0x84, 0x7c, 0x37, 0x9c, 0x2a, 0x13, 0x9d, 0x25, + 0xaf, 0x85, 0x8a, 0x4e, 0x2a, 0x0a, 0x87, 0xf5, 0xa0, 0x15, 0xc5, 0xe1, 0x34, 0xe6, 0x49, 0x22, + 0xf3, 0x50, 0xd0, 0xb8, 0x76, 0xce, 0x53, 0x57, 0x34, 0x3d, 0xf9, 0x11, 0x29, 0x1c, 0xfb, 0xb7, + 0x1a, 0xac, 0x8f, 0xaf, 0xdc, 0xd8, 0xf3, 0x83, 0xe9, 0xf3, 0x38, 0xcc, 0xa8, 0x2d, 0xa5, 0x6e, + 0x3c, 0xe5, 0x79, 0x0f, 0x96, 0x14, 0x7e, 0xa1, 0x87, 0x87, 0x23, 0xdc, 0x9f, 0xbe, 0x50, 0xfc, + 0x8f, 0x3b, 0x5f, 0xfa, 0x71, 0x92, 0xe2, 0x07, 0x2a, 0x77, 0xce, 0x69, 0x2a, 0x98, 0xbb, 0x60, + 0x42, 0x10, 0x1a, 0x54, 0x30, 0x44, 0xe1, 0x9a, 0x2c, 0x90, 0x92, 0x06, 0x49, 0x0a, 0xda, 0xfe, + 0x93, 0x01, 0x30, 0xbe, 0x0b, 0x26, 0x12, 0x9e, 0x3e, 0x74, 0x28, 0xcc, 0xa3, 0x1b, 0x1e, 0xa4, + 0x39, 0x38, 0x2a, 0x0b, 0x8d, 0x11, 0x79, 0x1e, 0xe5, 0xc0, 0x14, 0x34, 0xb6, 0x8f, 0x98, 0x4f, + 0x78, 0x90, 0xa2, 0xd0, 0x20, 0x61, 0xc9, 0x60, 0x36, 0x74, 0xe7, 0x6e, 0x92, 0xf2, 0xb8, 0x02, + 0x4d, 0x85, 0xc7, 0x06, 0x60, 0xaa, 0xf4, 0xf3, 0xd4, 0xf7, 0x64, 0xa3, 0x79, 0xc0, 0x47, 0x7b, + 0x14, 0x44, 0x6e, 0xaf, 0x29, 0xec, 0xa9, 0x3c, 0xb4, 0xa7, 0xd2, 0x64, 0x6f, 0x4d, 0xd8, 0xbb, + 0xcf, 0x47, 0x7b, 0x17, 0xb3, 0x70, 0x72, 0xed, 0x07, 0x53, 0x82, 0xbd, 0x45, 0x50, 0x55, 0x78, + 0xec, 0x0b, 0x30, 0xb3, 0x20, 0xe6, 0x49, 0x38, 0xbb, 0xe1, 0x1e, 0x65, 0x2f, 0xb1, 0xda, 0x4a, + 0xc7, 0x50, 0xf3, 0xea, 0x3c, 0x50, 0x55, 0x32, 0x04, 0xe2, 0x93, 0x91, 0x19, 0xda, 0x05, 0x10, + 0x6d, 0xf6, 0xfc, 0x2e, 0xe2, 0x56, 0x47, 0xd4, 0x4c, 0xc9, 0xb1, 0xff, 0xae, 0x43, 0x47, 0x69, + 0x2b, 0x0f, 0xa0, 0xd4, 0xbe, 0x26, 0x94, 0xfa, 0x12, 0x28, 0xfb, 0x79, 0x33, 0xcb, 0x2e, 0x0e, + 0xfd, 0x58, 0x16, 0x96, 0xca, 0x2a, 0x34, 0x2a, 0xb9, 0x53, 0x59, 0x6c, 0x0f, 0x1e, 0x29, 0xa4, + 0x92, 0xb9, 0xfb, 0x6c, 0xb6, 0x0f, 0x8c, 0x58, 0x43, 0x37, 0x9d, 0x5c, 0xbd, 0x8e, 0x4e, 0xc8, + 0x1b, 0x4a, 0x5f, 0xcb, 0x59, 0x20, 0x61, 0xdf, 0x84, 0x46, 0x92, 0xba, 0x53, 0x4e, 0x99, 0xcb, + 0xcf, 0x31, 0x64, 0x38, 0x82, 0xcf, 0x3e, 0x2f, 0x3a, 0x68, 0x8b, 0xba, 0x18, 0xe5, 0xe2, 0x34, + 0x0e, 0xb1, 0xb7, 0x38, 0x24, 0xc8, 0x9b, 0xaa, 0xfd, 0x1f, 0x1d, 0xd6, 0x2b, 0x7d, 0x7d, 0xe1, + 0xb1, 0x59, 0xec, 0xa8, 0x2f, 0xd9, 0xb1, 0x0f, 0xf5, 0x2c, 0xf0, 0x53, 0x42, 0x6a, 0xe3, 0xa0, + 0x8b, 0xf2, 0xd7, 0x81, 0x9f, 0x62, 0xb2, 0x1c, 0x92, 0x28, 0x3e, 0xd5, 0xff, 0x87, 0x4f, 0xec, + 0xdb, 0xb0, 0x55, 0x56, 0xca, 0xe1, 0xe1, 0x68, 0x14, 0x4e, 0xae, 0x8f, 0x0f, 0x25, 0x7a, 0x8b, + 0x44, 0x8c, 0x89, 0x23, 0x80, 0x2a, 0xfe, 0x45, 0x4d, 0x1c, 0x02, 0x9f, 0x41, 0x63, 0x82, 0xdd, + 0x99, 0x50, 0x92, 0x47, 0x91, 0xd2, 0xae, 0x5f, 0xd4, 0x1c, 0x21, 0x67, 0x9f, 0x40, 0xdd, 0xcb, + 0xe6, 0x91, 0xc4, 0x8a, 0x3a, 0x7e, 0xd9, 0x2f, 0x5f, 0xd4, 0x1c, 0x92, 0xa2, 0xd6, 0x2c, 0x74, + 0x3d, 0xab, 0x5d, 0x6a, 0x95, 0x6d, 0x14, 0xb5, 0x50, 0x8a, 0x5a, 0x58, 0xc2, 0x54, 0xce, 0x52, + 0xab, 0xec, 0x26, 0xa8, 0x85, 0xd2, 0xa7, 0x2d, 0x68, 0x26, 0xa2, 0x1b, 0xff, 0x18, 0x36, 0x2b, + 0xe8, 0x8f, 0xfc, 0x84, 0xa0, 0x12, 0x62, 0x4b, 0x5b, 0x76, 0xf8, 0xe6, 0xeb, 0x77, 0x01, 0x28, + 0x26, 0x71, 0x82, 0xc9, 0x93, 0x50, 0x2b, 0x2f, 0x0a, 0x1f, 0x43, 0x1b, 0x63, 0x59, 0x21, 0xc6, + 0x20, 0x96, 0x89, 0x23, 0xe8, 0x92, 0xf7, 0x67, 0xa3, 0x25, 0x1a, 0xec, 0x00, 0xb6, 0xc5, 0xb9, + 0x54, 0xdc, 0x69, 0xfd, 0xd4, 0x0f, 0x03, 0xf9, 0x61, 0x2d, 0x94, 0x61, 0xc7, 0xe4, 0x68, 0x6e, + 0x7c, 0x36, 0xca, 0x5b, 0x76, 0x4e, 0xdb, 0xdf, 0x87, 0x36, 0xee, 0x28, 0xb6, 0xdb, 0x83, 0x26, + 0x09, 0x72, 0x1c, 0xcc, 0x02, 0x4e, 0xe9, 0x90, 0x23, 0xe5, 0x08, 0x43, 0x79, 0x30, 0x2f, 0x08, + 0xe4, 0x0f, 0x3a, 0x74, 0xd5, 0x93, 0xff, 0xff, 0x55, 0xe4, 0x4c, 0xb9, 0x20, 0xe7, 0x75, 0xf8, + 0x69, 0x5e, 0x87, 0xca, 0x8d, 0xa2, 0xcc, 0x59, 0x59, 0x86, 0x4f, 0x64, 0x19, 0x36, 0x49, 0x6d, + 0x3d, 0x2f, 0xc3, 0x5c, 0x4b, 0x54, 0xe1, 0x13, 0x59, 0x85, 0x6b, 0xa5, 0x52, 0x91, 0xc0, 0xa2, + 0x08, 0x9f, 0xc8, 0x22, 0x6c, 0x95, 0x4a, 0x05, 0xa8, 0x45, 0x0d, 0xae, 0x41, 0x83, 0xc0, 0xb3, + 0x7f, 0x08, 0xa6, 0x0a, 0x0d, 0x55, 0xe0, 0xa7, 0x52, 0x58, 0x01, 0x5e, 0xbd, 0x39, 0xc9, 0xb5, + 0x6f, 0x60, 0xbd, 0xf2, 0x09, 0x63, 0xe3, 0xf6, 0x93, 0xa1, 0x1b, 0x4c, 0xf8, 0xac, 0xb8, 0x07, + 0x29, 0x1c, 0x25, 0xa5, 0x7a, 0x69, 0x59, 0x9a, 0xa8, 0xa4, 0x54, 0xb9, 0xcd, 0x18, 0x95, 0xdb, + 0xcc, 0x5f, 0x34, 0x68, 0x9e, 0x8b, 0x24, 0x5a, 0xb0, 0x76, 0x14, 0xc7, 0xc3, 0xd0, 0x13, 0x79, + 0x6c, 0x38, 0x39, 0x89, 0x25, 0x86, 0x7f, 0x67, 0x6e, 0x92, 0xc8, 0x5b, 0x57, 0x41, 0x4b, 0xd9, + 0x78, 0x12, 0x46, 0x5c, 0x5e, 0xbb, 0x0a, 0x5a, 0xca, 0x46, 0xfc, 0x86, 0xcf, 0xe4, 0xb5, 0xab, + 0xa0, 0x71, 0xb7, 0x13, 0x9e, 0x24, 0x58, 0x20, 0xa2, 0x13, 0xe5, 0x24, 0xae, 0x72, 0xdc, 0xdb, + 0xa1, 0x9b, 0x25, 0x5c, 0x1e, 0xba, 0x05, 0x6d, 0x73, 0xe8, 0xaa, 0xe1, 0xb1, 0x6f, 0x41, 0x9d, + 0xce, 0x34, 0xf1, 0x20, 0xa3, 0xdc, 0x90, 0x40, 0x14, 0x11, 0xfe, 0xe6, 0xe5, 0xab, 0x97, 0x5f, + 0x59, 0x3f, 0x4f, 0x87, 0x41, 0x19, 0x15, 0xcf, 0xb8, 0x4a, 0x22, 0x7a, 0x60, 0x8d, 0x6f, 0xfd, + 0x74, 0x72, 0x45, 0x9f, 0x81, 0x38, 0x27, 0xe4, 0x3b, 0xc8, 0x3e, 0x80, 0x2d, 0xf9, 0x36, 0xac, + 0xbc, 0x5c, 0xbf, 0xa1, 0x3c, 0x0c, 0x3b, 0xc5, 0x35, 0x57, 0x3c, 0x86, 0xec, 0x0c, 0xb6, 0xab, + 0x6b, 0xe4, 0xdd, 0x7c, 0xd5, 0xa2, 0xf7, 0xf0, 0x9c, 0xbc, 0x85, 0xcd, 0xd3, 0x2c, 0x9e, 0x56, + 0x1d, 0xed, 0x41, 0xcb, 0x0f, 0xdc, 0x49, 0xea, 0xdf, 0x70, 0x59, 0x51, 0x05, 0x4d, 0xef, 0x4c, + 0x5f, 0xbe, 0x85, 0x0d, 0x87, 0xfe, 0x8b, 0x2b, 0xe1, 0x8c, 0xd3, 0xf7, 0x5d, 0x5c, 0x09, 0x05, + 0x4d, 0xae, 0x88, 0x33, 0xbd, 0x2e, 0x5d, 0x21, 0x0a, 0xf1, 0xa3, 0x97, 0x88, 0x78, 0xa9, 0x0d, + 0xc3, 0xe0, 0xd2, 0x9f, 0xe6, 0xf8, 0xdd, 0xc2, 0xe3, 0x05, 0xb2, 0xf7, 0xf6, 0x58, 0x51, 0x46, + 0x0e, 0xf5, 0xca, 0xc8, 0x61, 0xf0, 0x6b, 0x68, 0x8a, 0xc7, 0x3a, 0x5b, 0x87, 0xf6, 0x71, 0x70, + 0xe3, 0xce, 0x7c, 0xef, 0x55, 0x64, 0xd6, 0x58, 0x0b, 0xea, 0xe3, 0x34, 0x8c, 0x4c, 0x8d, 0xb5, + 0xa1, 0x71, 0x8a, 0x75, 0x66, 0xea, 0x0c, 0xa0, 0x89, 0x1f, 0xe1, 0x9c, 0x9b, 0x06, 0xb2, 0x69, + 0x0e, 0x62, 0xd6, 0x91, 0x2d, 0x5e, 0xe8, 0x66, 0x83, 0x6d, 0x00, 0x7c, 0x99, 0xa5, 0xa1, 0x54, + 0x6b, 0x0e, 0x06, 0xd0, 0xa0, 0xf7, 0x2f, 0x19, 0xfc, 0xf9, 0xf1, 0xa9, 0x59, 0x63, 0x1d, 0x58, + 0x73, 0x8e, 0x4e, 0x47, 0x5f, 0x0e, 0x8f, 0x4c, 0x0d, 0xd7, 0x1e, 0xbf, 0xfc, 0xd9, 0xd1, 0xf0, + 0xdc, 0xd4, 0x07, 0xbf, 0x20, 0x93, 0x53, 0x8c, 0xa0, 0x2b, 0x7d, 0x21, 0xda, 0xac, 0xb1, 0x35, + 0x30, 0x5e, 0xf2, 0x5b, 0x53, 0xa3, 0xc5, 0x59, 0x80, 0x8f, 0x11, 0xe1, 0x0f, 0xb9, 0xe6, 0x99, + 0x06, 0x0a, 0xd0, 0xe1, 0x88, 0x7b, 0x66, 0x9d, 0x75, 0xa1, 0xf5, 0x4c, 0xbe, 0x2e, 0xcc, 0xc6, + 0xe0, 0x15, 0xb4, 0xf2, 0x26, 0xca, 0x1e, 0x41, 0x47, 0x9a, 0x46, 0x96, 0x59, 0xc3, 0x38, 0xa8, + 0x55, 0x9a, 0x1a, 0xba, 0x88, 0xed, 0xd0, 0xd4, 0xf1, 0x1f, 0xf6, 0x3c, 0xd3, 0x20, 0xb7, 0xef, + 0x82, 0x89, 0x59, 0x47, 0x45, 0xaa, 0x19, 0xd3, 0x1b, 0xfc, 0x08, 0xda, 0xc5, 0x17, 0x85, 0xce, + 0xbe, 0x0e, 0xae, 0x83, 0xf0, 0x36, 0x20, 0x9e, 0x08, 0xf0, 0xe8, 0x37, 0x1c, 0x8f, 0x10, 0x53, + 0xc3, 0x0d, 0xc9, 0xfe, 0x33, 0x3a, 0xa7, 0x4c, 0x7d, 0x70, 0x02, 0x6b, 0xb2, 0xa2, 0x19, 0x83, + 0x0d, 0xe9, 0x8c, 0xe4, 0x98, 0x35, 0xcc, 0x03, 0xc6, 0x21, 0xb6, 0xd2, 0x10, 0x4f, 0x0a, 0x51, + 0xd0, 0x3a, 0x9a, 0x13, 0xd8, 0x0a, 0x86, 0x71, 0xf0, 0xaf, 0x26, 0x34, 0x45, 0xd9, 0xb0, 0x21, + 0x74, 0xd5, 0xd1, 0x14, 0xfb, 0x50, 0x1e, 0x2f, 0xf7, 0x87, 0x55, 0x3d, 0x8b, 0x0e, 0x88, 0x05, + 0x73, 0x03, 0xbb, 0xc6, 0x8e, 0x61, 0xa3, 0x3a, 0xe6, 0x61, 0x8f, 0x51, 0x7b, 0xe1, 0x0c, 0xa9, + 0xd7, 0x5b, 0x24, 0x2a, 0x4c, 0x1d, 0xc1, 0x7a, 0x65, 0x72, 0xc3, 0x68, 0xdf, 0x45, 0xc3, 0x9c, + 0x95, 0x1e, 0xfd, 0x14, 0x3a, 0xca, 0x20, 0x82, 0xed, 0xa0, 0xea, 0xc3, 0x29, 0x4f, 0xef, 0xc3, + 0x07, 0xfc, 0xc2, 0xc2, 0x17, 0x00, 0xe5, 0x10, 0x80, 0x7d, 0x50, 0x28, 0xaa, 0xc3, 0x9f, 0xde, + 0xce, 0x7d, 0x76, 0xb1, 0xfc, 0x19, 0x80, 0x9c, 0x00, 0x9d, 0x8d, 0x12, 0xf6, 0x11, 0xea, 0x2d, + 0x9b, 0x08, 0xad, 0x0c, 0xe4, 0x04, 0x36, 0x1f, 0x74, 0x50, 0x61, 0x6e, 0x59, 0x63, 0x5d, 0x69, + 0x6e, 0x08, 0x5d, 0xb5, 0x81, 0x8a, 0x74, 0x2f, 0x68, 0xc3, 0xc2, 0xc8, 0xa2, 0x5e, 0x6b, 0xd7, + 0xd8, 0x4f, 0x00, 0xca, 0x76, 0x28, 0xa0, 0x79, 0xd0, 0x1e, 0x57, 0x7a, 0xf1, 0x1c, 0x36, 0x95, + 0x21, 0xa7, 0x68, 0x5d, 0x22, 0x47, 0x0f, 0x67, 0x9f, 0x2b, 0x0d, 0x39, 0x72, 0x22, 0xa7, 0xf6, + 0x40, 0x81, 0xce, 0xb2, 0xb6, 0xd9, 0xfb, 0x78, 0x89, 0x54, 0x85, 0x48, 0x9d, 0xa8, 0x0a, 0x88, + 0x16, 0xcc, 0x58, 0x57, 0x39, 0xf6, 0xd4, 0xfa, 0xc7, 0xdb, 0x5d, 0xed, 0xab, 0xb7, 0xbb, 0xda, + 0xbf, 0xdf, 0xee, 0x6a, 0xbf, 0x7f, 0xb7, 0x5b, 0xfb, 0xea, 0xdd, 0x6e, 0xed, 0x9f, 0xef, 0x76, + 0x6b, 0x17, 0x4d, 0x1a, 0x0b, 0x7f, 0xf7, 0xbf, 0x01, 0x00, 0x00, 0xff, 0xff, 0xd9, 0xe1, 0x12, + 0x36, 0x28, 0x16, 0x00, 0x00, } // Reference imports to suppress errors if they are not otherwise used. @@ -3034,7 +2891,6 @@ type WorkerClient interface { UpdateRelayConfig(ctx context.Context, in *UpdateRelayRequest, opts ...grpc.CallOption) (*CommonWorkerResponse, error) QueryWorkerConfig(ctx context.Context, in *QueryWorkerConfigRequest, opts ...grpc.CallOption) (*QueryWorkerConfigResponse, error) MigrateRelay(ctx context.Context, in *MigrateRelayRequest, opts ...grpc.CallOption) (*CommonWorkerResponse, error) - OperateMysqlWorker(ctx context.Context, in *MysqlWorkerRequest, opts ...grpc.CallOption) (*MysqlWorkerResponse, error) } type workerClient struct { @@ -3153,15 +3009,6 @@ func (c *workerClient) MigrateRelay(ctx context.Context, in *MigrateRelayRequest return out, nil } -func (c *workerClient) OperateMysqlWorker(ctx context.Context, in *MysqlWorkerRequest, opts ...grpc.CallOption) (*MysqlWorkerResponse, error) { - out := new(MysqlWorkerResponse) - err := c.cc.Invoke(ctx, "/pb.Worker/OperateMysqlWorker", in, out, opts...) - if err != nil { - return nil, err - } - return out, nil -} - // WorkerServer is the server API for Worker service. type WorkerServer interface { StartSubTask(context.Context, *StartSubTaskRequest) (*CommonWorkerResponse, error) @@ -3180,7 +3027,6 @@ type WorkerServer interface { UpdateRelayConfig(context.Context, *UpdateRelayRequest) (*CommonWorkerResponse, error) QueryWorkerConfig(context.Context, *QueryWorkerConfigRequest) (*QueryWorkerConfigResponse, error) MigrateRelay(context.Context, *MigrateRelayRequest) (*CommonWorkerResponse, error) - OperateMysqlWorker(context.Context, *MysqlWorkerRequest) (*MysqlWorkerResponse, error) } // UnimplementedWorkerServer can be embedded to have forward compatible implementations. @@ -3223,9 +3069,6 @@ func (*UnimplementedWorkerServer) QueryWorkerConfig(ctx context.Context, req *Qu func (*UnimplementedWorkerServer) MigrateRelay(ctx context.Context, req *MigrateRelayRequest) (*CommonWorkerResponse, error) { return nil, status.Errorf(codes.Unimplemented, "method MigrateRelay not implemented") } -func (*UnimplementedWorkerServer) OperateMysqlWorker(ctx context.Context, req *MysqlWorkerRequest) (*MysqlWorkerResponse, error) { - return nil, status.Errorf(codes.Unimplemented, "method OperateMysqlWorker not implemented") -} func RegisterWorkerServer(s *grpc.Server, srv WorkerServer) { s.RegisterService(&_Worker_serviceDesc, srv) @@ -3447,24 +3290,6 @@ func _Worker_MigrateRelay_Handler(srv interface{}, ctx context.Context, dec func return interceptor(ctx, in, info, handler) } -func _Worker_OperateMysqlWorker_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { - in := new(MysqlWorkerRequest) - if err := dec(in); err != nil { - return nil, err - } - if interceptor == nil { - return srv.(WorkerServer).OperateMysqlWorker(ctx, in) - } - info := &grpc.UnaryServerInfo{ - Server: srv, - FullMethod: "/pb.Worker/OperateMysqlWorker", - } - handler := func(ctx context.Context, req interface{}) (interface{}, error) { - return srv.(WorkerServer).OperateMysqlWorker(ctx, req.(*MysqlWorkerRequest)) - } - return interceptor(ctx, in, info, handler) -} - var _Worker_serviceDesc = grpc.ServiceDesc{ ServiceName: "pb.Worker", HandlerType: (*WorkerServer)(nil), @@ -3517,10 +3342,6 @@ var _Worker_serviceDesc = grpc.ServiceDesc{ MethodName: "MigrateRelay", Handler: _Worker_MigrateRelay_Handler, }, - { - MethodName: "OperateMysqlWorker", - Handler: _Worker_OperateMysqlWorker_Handler, - }, }, Streams: []grpc.StreamDesc{}, Metadata: "dmworker.proto", @@ -5412,81 +5233,6 @@ func (m *QueryWorkerConfigResponse) MarshalToSizedBuffer(dAtA []byte) (int, erro return len(dAtA) - i, nil } -func (m *MysqlWorkerRequest) Marshal() (dAtA []byte, err error) { - size := m.Size() - dAtA = make([]byte, size) - n, err := m.MarshalToSizedBuffer(dAtA[:size]) - if err != nil { - return nil, err - } - return dAtA[:n], nil -} - -func (m *MysqlWorkerRequest) MarshalTo(dAtA []byte) (int, error) { - size := m.Size() - return m.MarshalToSizedBuffer(dAtA[:size]) -} - -func (m *MysqlWorkerRequest) MarshalToSizedBuffer(dAtA []byte) (int, error) { - i := len(dAtA) - _ = i - var l int - _ = l - if len(m.Config) > 0 { - i -= len(m.Config) - copy(dAtA[i:], m.Config) - i = encodeVarintDmworker(dAtA, i, uint64(len(m.Config))) - i-- - dAtA[i] = 0x12 - } - if m.Op != 0 { - i = encodeVarintDmworker(dAtA, i, uint64(m.Op)) - i-- - dAtA[i] = 0x8 - } - return len(dAtA) - i, nil -} - -func (m *MysqlWorkerResponse) Marshal() (dAtA []byte, err error) { - size := m.Size() - dAtA = make([]byte, size) - n, err := m.MarshalToSizedBuffer(dAtA[:size]) - if err != nil { - return nil, err - } - return dAtA[:n], nil -} - -func (m *MysqlWorkerResponse) MarshalTo(dAtA []byte) (int, error) { - size := m.Size() - return m.MarshalToSizedBuffer(dAtA[:size]) -} - -func (m *MysqlWorkerResponse) MarshalToSizedBuffer(dAtA []byte) (int, error) { - i := len(dAtA) - _ = i - var l int - _ = l - if len(m.Msg) > 0 { - i -= len(m.Msg) - copy(dAtA[i:], m.Msg) - i = encodeVarintDmworker(dAtA, i, uint64(len(m.Msg))) - i-- - dAtA[i] = 0x12 - } - if m.Result { - i-- - if m.Result { - dAtA[i] = 1 - } else { - dAtA[i] = 0 - } - i-- - dAtA[i] = 0x8 - } - return len(dAtA) - i, nil -} - func encodeVarintDmworker(dAtA []byte, offset int, v uint64) int { offset -= sovDmworker(v) base := offset @@ -6372,38 +6118,6 @@ func (m *QueryWorkerConfigResponse) Size() (n int) { return n } -func (m *MysqlWorkerRequest) Size() (n int) { - if m == nil { - return 0 - } - var l int - _ = l - if m.Op != 0 { - n += 1 + sovDmworker(uint64(m.Op)) - } - l = len(m.Config) - if l > 0 { - n += 1 + l + sovDmworker(uint64(l)) - } - return n -} - -func (m *MysqlWorkerResponse) Size() (n int) { - if m == nil { - return 0 - } - var l int - _ = l - if m.Result { - n += 2 - } - l = len(m.Msg) - if l > 0 { - n += 1 + l + sovDmworker(uint64(l)) - } - return n -} - func sovDmworker(x uint64) (n int) { return (math_bits.Len64(x|1) + 6) / 7 } @@ -11739,215 +11453,6 @@ func (m *QueryWorkerConfigResponse) Unmarshal(dAtA []byte) error { } return nil } -func (m *MysqlWorkerRequest) Unmarshal(dAtA []byte) error { - l := len(dAtA) - iNdEx := 0 - for iNdEx < l { - preIndex := iNdEx - var wire uint64 - for shift := uint(0); ; shift += 7 { - if shift >= 64 { - return ErrIntOverflowDmworker - } - if iNdEx >= l { - return io.ErrUnexpectedEOF - } - b := dAtA[iNdEx] - iNdEx++ - wire |= uint64(b&0x7F) << shift - if b < 0x80 { - break - } - } - fieldNum := int32(wire >> 3) - wireType := int(wire & 0x7) - if wireType == 4 { - return fmt.Errorf("proto: MysqlWorkerRequest: wiretype end group for non-group") - } - if fieldNum <= 0 { - return fmt.Errorf("proto: MysqlWorkerRequest: illegal tag %d (wire type %d)", fieldNum, wire) - } - switch fieldNum { - case 1: - if wireType != 0 { - return fmt.Errorf("proto: wrong wireType = %d for field Op", wireType) - } - m.Op = 0 - for shift := uint(0); ; shift += 7 { - if shift >= 64 { - return ErrIntOverflowDmworker - } - if iNdEx >= l { - return io.ErrUnexpectedEOF - } - b := dAtA[iNdEx] - iNdEx++ - m.Op |= WorkerOp(b&0x7F) << shift - if b < 0x80 { - break - } - } - case 2: - if wireType != 2 { - return fmt.Errorf("proto: wrong wireType = %d for field Config", wireType) - } - var stringLen uint64 - for shift := uint(0); ; shift += 7 { - if shift >= 64 { - return ErrIntOverflowDmworker - } - if iNdEx >= l { - return io.ErrUnexpectedEOF - } - b := dAtA[iNdEx] - iNdEx++ - stringLen |= uint64(b&0x7F) << shift - if b < 0x80 { - break - } - } - intStringLen := int(stringLen) - if intStringLen < 0 { - return ErrInvalidLengthDmworker - } - postIndex := iNdEx + intStringLen - if postIndex < 0 { - return ErrInvalidLengthDmworker - } - if postIndex > l { - return io.ErrUnexpectedEOF - } - m.Config = string(dAtA[iNdEx:postIndex]) - iNdEx = postIndex - default: - iNdEx = preIndex - skippy, err := skipDmworker(dAtA[iNdEx:]) - if err != nil { - return err - } - if skippy < 0 { - return ErrInvalidLengthDmworker - } - if (iNdEx + skippy) < 0 { - return ErrInvalidLengthDmworker - } - if (iNdEx + skippy) > l { - return io.ErrUnexpectedEOF - } - iNdEx += skippy - } - } - - if iNdEx > l { - return io.ErrUnexpectedEOF - } - return nil -} -func (m *MysqlWorkerResponse) Unmarshal(dAtA []byte) error { - l := len(dAtA) - iNdEx := 0 - for iNdEx < l { - preIndex := iNdEx - var wire uint64 - for shift := uint(0); ; shift += 7 { - if shift >= 64 { - return ErrIntOverflowDmworker - } - if iNdEx >= l { - return io.ErrUnexpectedEOF - } - b := dAtA[iNdEx] - iNdEx++ - wire |= uint64(b&0x7F) << shift - if b < 0x80 { - break - } - } - fieldNum := int32(wire >> 3) - wireType := int(wire & 0x7) - if wireType == 4 { - return fmt.Errorf("proto: MysqlWorkerResponse: wiretype end group for non-group") - } - if fieldNum <= 0 { - return fmt.Errorf("proto: MysqlWorkerResponse: illegal tag %d (wire type %d)", fieldNum, wire) - } - switch fieldNum { - case 1: - if wireType != 0 { - return fmt.Errorf("proto: wrong wireType = %d for field Result", wireType) - } - var v int - for shift := uint(0); ; shift += 7 { - if shift >= 64 { - return ErrIntOverflowDmworker - } - if iNdEx >= l { - return io.ErrUnexpectedEOF - } - b := dAtA[iNdEx] - iNdEx++ - v |= int(b&0x7F) << shift - if b < 0x80 { - break - } - } - m.Result = bool(v != 0) - case 2: - if wireType != 2 { - return fmt.Errorf("proto: wrong wireType = %d for field Msg", wireType) - } - var stringLen uint64 - for shift := uint(0); ; shift += 7 { - if shift >= 64 { - return ErrIntOverflowDmworker - } - if iNdEx >= l { - return io.ErrUnexpectedEOF - } - b := dAtA[iNdEx] - iNdEx++ - stringLen |= uint64(b&0x7F) << shift - if b < 0x80 { - break - } - } - intStringLen := int(stringLen) - if intStringLen < 0 { - return ErrInvalidLengthDmworker - } - postIndex := iNdEx + intStringLen - if postIndex < 0 { - return ErrInvalidLengthDmworker - } - if postIndex > l { - return io.ErrUnexpectedEOF - } - m.Msg = string(dAtA[iNdEx:postIndex]) - iNdEx = postIndex - default: - iNdEx = preIndex - skippy, err := skipDmworker(dAtA[iNdEx:]) - if err != nil { - return err - } - if skippy < 0 { - return ErrInvalidLengthDmworker - } - if (iNdEx + skippy) < 0 { - return ErrInvalidLengthDmworker - } - if (iNdEx + skippy) > l { - return io.ErrUnexpectedEOF - } - iNdEx += skippy - } - } - - if iNdEx > l { - return io.ErrUnexpectedEOF - } - return nil -} func skipDmworker(dAtA []byte) (n int, err error) { l := len(dAtA) iNdEx := 0 diff --git a/dm/pbmock/dmmaster.go b/dm/pbmock/dmmaster.go index 4dcf853a25..c181f699dc 100644 --- a/dm/pbmock/dmmaster.go +++ b/dm/pbmock/dmmaster.go @@ -135,24 +135,24 @@ func (mr *MockMasterClientMockRecorder) OfflineWorker(arg0, arg1 interface{}, ar return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "OfflineWorker", reflect.TypeOf((*MockMasterClient)(nil).OfflineWorker), varargs...) } -// OperateMysqlWorker mocks base method -func (m *MockMasterClient) OperateMysqlWorker(arg0 context.Context, arg1 *pb.MysqlWorkerRequest, arg2 ...grpc.CallOption) (*pb.MysqlWorkerResponse, error) { +// OperateSource mocks base method +func (m *MockMasterClient) OperateSource(arg0 context.Context, arg1 *pb.OperateSourceRequest, arg2 ...grpc.CallOption) (*pb.OperateSourceResponse, error) { m.ctrl.T.Helper() varargs := []interface{}{arg0, arg1} for _, a := range arg2 { varargs = append(varargs, a) } - ret := m.ctrl.Call(m, "OperateMysqlWorker", varargs...) - ret0, _ := ret[0].(*pb.MysqlWorkerResponse) + ret := m.ctrl.Call(m, "OperateSource", varargs...) + ret0, _ := ret[0].(*pb.OperateSourceResponse) ret1, _ := ret[1].(error) return ret0, ret1 } -// OperateMysqlWorker indicates an expected call of OperateMysqlWorker -func (mr *MockMasterClientMockRecorder) OperateMysqlWorker(arg0, arg1 interface{}, arg2 ...interface{}) *gomock.Call { +// OperateSource indicates an expected call of OperateSource +func (mr *MockMasterClientMockRecorder) OperateSource(arg0, arg1 interface{}, arg2 ...interface{}) *gomock.Call { mr.mock.ctrl.T.Helper() varargs := append([]interface{}{arg0, arg1}, arg2...) - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "OperateMysqlWorker", reflect.TypeOf((*MockMasterClient)(nil).OperateMysqlWorker), varargs...) + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "OperateSource", reflect.TypeOf((*MockMasterClient)(nil).OperateSource), varargs...) } // OperateTask mocks base method @@ -513,19 +513,19 @@ func (mr *MockMasterServerMockRecorder) OfflineWorker(arg0, arg1 interface{}) *g return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "OfflineWorker", reflect.TypeOf((*MockMasterServer)(nil).OfflineWorker), arg0, arg1) } -// OperateMysqlWorker mocks base method -func (m *MockMasterServer) OperateMysqlWorker(arg0 context.Context, arg1 *pb.MysqlWorkerRequest) (*pb.MysqlWorkerResponse, error) { +// OperateSource mocks base method +func (m *MockMasterServer) OperateSource(arg0 context.Context, arg1 *pb.OperateSourceRequest) (*pb.OperateSourceResponse, error) { m.ctrl.T.Helper() - ret := m.ctrl.Call(m, "OperateMysqlWorker", arg0, arg1) - ret0, _ := ret[0].(*pb.MysqlWorkerResponse) + ret := m.ctrl.Call(m, "OperateSource", arg0, arg1) + ret0, _ := ret[0].(*pb.OperateSourceResponse) ret1, _ := ret[1].(error) return ret0, ret1 } -// OperateMysqlWorker indicates an expected call of OperateMysqlWorker -func (mr *MockMasterServerMockRecorder) OperateMysqlWorker(arg0, arg1 interface{}) *gomock.Call { +// OperateSource indicates an expected call of OperateSource +func (mr *MockMasterServerMockRecorder) OperateSource(arg0, arg1 interface{}) *gomock.Call { mr.mock.ctrl.T.Helper() - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "OperateMysqlWorker", reflect.TypeOf((*MockMasterServer)(nil).OperateMysqlWorker), arg0, arg1) + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "OperateSource", reflect.TypeOf((*MockMasterServer)(nil).OperateSource), arg0, arg1) } // OperateTask mocks base method diff --git a/dm/pbmock/dmworker.go b/dm/pbmock/dmworker.go index caa5087a85..545dd2a735 100644 --- a/dm/pbmock/dmworker.go +++ b/dm/pbmock/dmworker.go @@ -75,26 +75,6 @@ func (mr *MockWorkerClientMockRecorder) MigrateRelay(arg0, arg1 interface{}, arg return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "MigrateRelay", reflect.TypeOf((*MockWorkerClient)(nil).MigrateRelay), varargs...) } -// OperateMysqlWorker mocks base method -func (m *MockWorkerClient) OperateMysqlWorker(arg0 context.Context, arg1 *pb.MysqlWorkerRequest, arg2 ...grpc.CallOption) (*pb.MysqlWorkerResponse, error) { - m.ctrl.T.Helper() - varargs := []interface{}{arg0, arg1} - for _, a := range arg2 { - varargs = append(varargs, a) - } - ret := m.ctrl.Call(m, "OperateMysqlWorker", varargs...) - ret0, _ := ret[0].(*pb.MysqlWorkerResponse) - ret1, _ := ret[1].(error) - return ret0, ret1 -} - -// OperateMysqlWorker indicates an expected call of OperateMysqlWorker -func (mr *MockWorkerClientMockRecorder) OperateMysqlWorker(arg0, arg1 interface{}, arg2 ...interface{}) *gomock.Call { - mr.mock.ctrl.T.Helper() - varargs := append([]interface{}{arg0, arg1}, arg2...) - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "OperateMysqlWorker", reflect.TypeOf((*MockWorkerClient)(nil).OperateMysqlWorker), varargs...) -} - // OperateRelay mocks base method func (m *MockWorkerClient) OperateRelay(arg0 context.Context, arg1 *pb.OperateRelayRequest, arg2 ...grpc.CallOption) (*pb.OperateRelayResponse, error) { m.ctrl.T.Helper() @@ -348,21 +328,6 @@ func (mr *MockWorkerServerMockRecorder) MigrateRelay(arg0, arg1 interface{}) *go return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "MigrateRelay", reflect.TypeOf((*MockWorkerServer)(nil).MigrateRelay), arg0, arg1) } -// OperateMysqlWorker mocks base method -func (m *MockWorkerServer) OperateMysqlWorker(arg0 context.Context, arg1 *pb.MysqlWorkerRequest) (*pb.MysqlWorkerResponse, error) { - m.ctrl.T.Helper() - ret := m.ctrl.Call(m, "OperateMysqlWorker", arg0, arg1) - ret0, _ := ret[0].(*pb.MysqlWorkerResponse) - ret1, _ := ret[1].(error) - return ret0, ret1 -} - -// OperateMysqlWorker indicates an expected call of OperateMysqlWorker -func (mr *MockWorkerServerMockRecorder) OperateMysqlWorker(arg0, arg1 interface{}) *gomock.Call { - mr.mock.ctrl.T.Helper() - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "OperateMysqlWorker", reflect.TypeOf((*MockWorkerServer)(nil).OperateMysqlWorker), arg0, arg1) -} - // OperateRelay mocks base method func (m *MockWorkerServer) OperateRelay(arg0 context.Context, arg1 *pb.OperateRelayRequest) (*pb.OperateRelayResponse, error) { m.ctrl.T.Helper() diff --git a/dm/proto/dmmaster.proto b/dm/proto/dmmaster.proto index 3f2afe5898..69d4efbc2b 100644 --- a/dm/proto/dmmaster.proto +++ b/dm/proto/dmmaster.proto @@ -58,8 +58,8 @@ service Master { // CheckTask checks legality of task configuration rpc CheckTask(CheckTaskRequest) returns (CheckTaskResponse) {} - // Operate mysql-worker for server - rpc OperateMysqlWorker(MysqlWorkerRequest) returns (MysqlWorkerResponse) {} + // Operate an upstream MySQL source. + rpc OperateSource(OperateSourceRequest) returns (OperateSourceResponse) {} // RegisterWorker register the dm-workers. rpc RegisterWorker(RegisterWorkerRequest) returns(RegisterWorkerResponse) {} @@ -296,6 +296,23 @@ message CheckTaskResponse { string msg = 2; } +enum SourceOp { + InvalidSourceOp = 0; + StartSource = 1; + UpdateSource = 2; + StopSource = 3; +} + +message OperateSourceRequest { + SourceOp op = 1; + string config = 2; +} + +message OperateSourceResponse { + bool result = 1; + string msg = 2; +} + message RegisterWorkerRequest { string name = 1; string address = 2; diff --git a/dm/proto/dmworker.proto b/dm/proto/dmworker.proto index dfec20796c..aa88cfba26 100644 --- a/dm/proto/dmworker.proto +++ b/dm/proto/dmworker.proto @@ -28,8 +28,6 @@ service Worker { rpc QueryWorkerConfig (QueryWorkerConfigRequest) returns (QueryWorkerConfigResponse) {} rpc MigrateRelay(MigrateRelayRequest) returns (CommonWorkerResponse) {} - - rpc OperateMysqlWorker(MysqlWorkerRequest) returns (MysqlWorkerResponse) {} } message StartSubTaskRequest { @@ -397,20 +395,3 @@ message QueryWorkerConfigResponse { string msg = 3; // when result is true, msg is empty string content = 4; // marshaled config content } - -enum WorkerOp { - InvalidWorkerOp = 0; - StartWorker = 1; - UpdateConfig = 2; - StopWorker = 3; -} - -message MysqlWorkerRequest { - WorkerOp op = 1; - string config = 2; -} - -message MysqlWorkerResponse { - bool result = 1; - string msg = 2; -} diff --git a/dm/worker/config.go b/dm/worker/config.go index bb9c8bec91..42ad05719f 100644 --- a/dm/worker/config.go +++ b/dm/worker/config.go @@ -32,6 +32,7 @@ import ( // later we can read it from dm/worker/dm-worker.toml // and assign it to SampleConfigFile while we build dm-worker var SampleConfigFile string +var defaultKeepAliveTTL = int64(3) var ( getRandomServerIDFunc = utils.GetRandomServerID @@ -54,6 +55,7 @@ func NewConfig() *Config { // NOTE: add `advertise-addr` for dm-master if needed. fs.StringVar(&cfg.Join, "join", "", `join to an existing cluster (usage: dm-master cluster's "${master-addr}")`) fs.StringVar(&cfg.Name, "name", "", "human-readable name for DM-worker member") + fs.Int64Var(&cfg.KeepAliveTTL, "keepalive-ttl", defaultKeepAliveTTL, "dm-worker's TTL for keepalive with etcd (in seconds)") return cfg } @@ -71,6 +73,8 @@ type Config struct { AdvertiseAddr string `toml:"advertise-addr" json:"advertise-addr"` ConfigFile string `json:"config-file"` + // TODO: in the future dm-workers should share a same ttl from dm-master + KeepAliveTTL int64 `toml:"keepalive-ttl" json:"keepalive-ttl"` printVersion bool printSampleConfig bool diff --git a/dm/worker/join.go b/dm/worker/join.go index 633a310391..ee76477412 100644 --- a/dm/worker/join.go +++ b/dm/worker/join.go @@ -15,16 +15,16 @@ package worker import ( "context" - "go.uber.org/zap" "strings" "time" - "github.com/pingcap/dm/dm/common" - "github.com/pingcap/dm/dm/pb" - "github.com/pingcap/dm/pkg/log" "github.com/pingcap/errors" - "go.etcd.io/etcd/clientv3" + "go.uber.org/zap" "google.golang.org/grpc" + + "github.com/pingcap/dm/dm/pb" + "github.com/pingcap/dm/pkg/ha" + "github.com/pingcap/dm/pkg/log" ) // GetJoinURLs gets the endpoints from the join address. @@ -64,46 +64,19 @@ func (s *Server) JoinMaster(endpoints []string) error { return nil } -var ( - defaultKeepAliveTTL = int64(3) - revokeLeaseTimeout = time.Second -) - // KeepAlive attempts to keep the lease of the server alive forever. -func (s *Server) KeepAlive() (bool, error) { - // TODO: fetch the actual master endpoints, the master member maybe changed. - cliCtx, canc := context.WithTimeout(s.ctx, revokeLeaseTimeout) - defer canc() - lease, err := s.etcdClient.Grant(cliCtx, defaultKeepAliveTTL) - if err != nil { - return false, err - } - k := common.WorkerKeepAliveKeyAdapter.Encode(s.cfg.AdvertiseAddr, s.cfg.Name) - _, err = s.etcdClient.Put(cliCtx, k, time.Now().String(), clientv3.WithLease(lease.ID)) - if err != nil { - return false, err - } - ch, err := s.etcdClient.KeepAlive(s.ctx, lease.ID) - if err != nil { - return false, err - } - log.L().Info("keepalive", zap.String("to-master", s.cfg.Join)) - // set retryConnectMaster as long as it connects success, for next retry - s.retryConnectMaster.Set(true) +func (s *Server) KeepAlive() { for { + log.L().Info("start to keepalive with master") + err1 := ha.KeepAlive(s.ctx, s.etcdClient, s.cfg.Name, s.cfg.KeepAliveTTL) + log.L().Warn("keepalive with master goroutine paused", zap.Error(err1)) + s.stopWorker("") select { - case _, ok := <-ch: - if !ok { - log.L().Info("keep alive channel is closed") - return false, nil - } case <-s.ctx.Done(): - log.L().Info("server is closing, exits keepalive") - ctx, cancel := context.WithTimeout(s.etcdClient.Ctx(), revokeLeaseTimeout) - defer cancel() - s.etcdClient.Revoke(ctx, lease.ID) - return true, nil + log.L().Info("keepalive with master goroutine exited!") + return + case <-time.After(retryConnectSleepTime): + // Try to connect master again } } - } diff --git a/dm/worker/relay.go b/dm/worker/relay.go index 454854178e..24736ef6b3 100644 --- a/dm/worker/relay.go +++ b/dm/worker/relay.go @@ -15,13 +15,13 @@ package worker import ( "context" - "github.com/pingcap/dm/dm/config" "sync" "github.com/pingcap/errors" "github.com/siddontang/go/sync2" "go.uber.org/zap" + "github.com/pingcap/dm/dm/config" "github.com/pingcap/dm/dm/pb" "github.com/pingcap/dm/dm/unit" "github.com/pingcap/dm/pkg/log" @@ -53,7 +53,7 @@ type RelayHolder interface { // Result returns the result of the relay Result() *pb.ProcessResult // Update updates relay config online - Update(ctx context.Context, cfg *config.MysqlConfig) error + Update(ctx context.Context, cfg *config.SourceConfig) error // Migrate resets binlog name and binlog position for relay unit Migrate(ctx context.Context, binlogName string, binlogPos uint32) error } @@ -68,7 +68,7 @@ type realRelayHolder struct { wg sync.WaitGroup relay relay.Process - cfg *config.MysqlConfig + cfg *config.SourceConfig ctx context.Context cancel context.CancelFunc @@ -81,7 +81,7 @@ type realRelayHolder struct { } // NewRealRelayHolder creates a new RelayHolder -func NewRealRelayHolder(cfg *config.MysqlConfig) RelayHolder { +func NewRealRelayHolder(cfg *config.SourceConfig) RelayHolder { clone, _ := cfg.DecryptPassword() relayCfg := &relay.Config{ EnableGTID: clone.EnableGTID, @@ -321,7 +321,7 @@ func (h *realRelayHolder) Result() *pb.ProcessResult { } // Update update relay config online -func (h *realRelayHolder) Update(ctx context.Context, cfg *config.MysqlConfig) error { +func (h *realRelayHolder) Update(ctx context.Context, cfg *config.SourceConfig) error { relayCfg := &relay.Config{ AutoFixGTID: cfg.AutoFixGTID, Charset: cfg.Charset, @@ -382,20 +382,23 @@ func (h *realRelayHolder) Migrate(ctx context.Context, binlogName string, binlog /******************** dummy relay holder ********************/ type dummyRelayHolder struct { + sync.RWMutex initError error + stage pb.Stage - cfg *config.MysqlConfig + cfg *config.SourceConfig } // NewDummyRelayHolder creates a new RelayHolder -func NewDummyRelayHolder(cfg *config.MysqlConfig) RelayHolder { +func NewDummyRelayHolder(cfg *config.SourceConfig) RelayHolder { return &dummyRelayHolder{ - cfg: cfg, + cfg: cfg, + stage: pb.Stage_New, } } // NewDummyRelayHolderWithInitError creates a new RelayHolder with init error -func NewDummyRelayHolderWithInitError(cfg *config.MysqlConfig) RelayHolder { +func NewDummyRelayHolderWithInitError(cfg *config.SourceConfig) RelayHolder { return &dummyRelayHolder{ initError: errors.New("init error"), cfg: cfg, @@ -413,14 +416,24 @@ func (d *dummyRelayHolder) Init(interceptors []purger.PurgeInterceptor) (purger. } // Start implements interface of RelayHolder -func (d *dummyRelayHolder) Start() {} +func (d *dummyRelayHolder) Start() { + d.Lock() + defer d.Unlock() + d.stage = pb.Stage_Running +} // Close implements interface of RelayHolder -func (d *dummyRelayHolder) Close() {} +func (d *dummyRelayHolder) Close() { + d.Lock() + defer d.Unlock() + d.stage = pb.Stage_Stopped +} // Status implements interface of RelayHolder func (d *dummyRelayHolder) Status() *pb.RelayStatus { - return nil + d.Lock() + defer d.Unlock() + return &pb.RelayStatus{Stage: d.stage} } // Error implements interface of RelayHolder @@ -435,6 +448,25 @@ func (d *dummyRelayHolder) SwitchMaster(ctx context.Context, req *pb.SwitchRelay // Operate implements interface of RelayHolder func (d *dummyRelayHolder) Operate(ctx context.Context, req *pb.OperateRelayRequest) error { + d.Lock() + defer d.Unlock() + switch req.Op { + case pb.RelayOp_PauseRelay: + if d.stage != pb.Stage_Running { + return terror.ErrWorkerRelayStageNotValid.Generate(d.stage, pb.Stage_Running, req.Op) + } + d.stage = pb.Stage_Paused + case pb.RelayOp_ResumeRelay: + if d.stage != pb.Stage_Paused { + return terror.ErrWorkerRelayStageNotValid.Generate(d.stage, pb.Stage_Paused, req.Op) + } + d.stage = pb.Stage_Running + case pb.RelayOp_StopRelay: + if d.stage == pb.Stage_Stopped { + return terror.ErrWorkerRelayStageNotValid.Generatef("current stage is already stopped not valid, relayop %s", req.Op) + } + d.stage = pb.Stage_Stopped + } return nil } @@ -444,7 +476,7 @@ func (d *dummyRelayHolder) Result() *pb.ProcessResult { } // Update implements interface of RelayHolder -func (d *dummyRelayHolder) Update(ctx context.Context, cfg *config.MysqlConfig) error { +func (d *dummyRelayHolder) Update(ctx context.Context, cfg *config.SourceConfig) error { return nil } @@ -458,5 +490,7 @@ func (d *dummyRelayHolder) EarliestActiveRelayLog() *streamer.RelayLogInfo { } func (d *dummyRelayHolder) Stage() pb.Stage { - return pb.Stage_Running + d.Lock() + defer d.Unlock() + return d.stage } diff --git a/dm/worker/relay_test.go b/dm/worker/relay_test.go index 350fc159cd..a9857fe261 100644 --- a/dm/worker/relay_test.go +++ b/dm/worker/relay_test.go @@ -134,14 +134,13 @@ func (t *testRelay) TestRelay(c *C) { purger.NewPurger = originNewPurger }() - cfg := &config.MysqlConfig{} - c.Assert(cfg.LoadFromFile("./dm-mysql.toml"), IsNil) + cfg := loadSourceConfigWithoutPassword(c) dir := c.MkDir() cfg.RelayDir = dir cfg.MetaDir = dir - relayHolder := NewRealRelayHolder(cfg) + relayHolder := NewRealRelayHolder(&cfg) c.Assert(relayHolder, NotNil) holder, ok := relayHolder.(*realRelayHolder) @@ -271,7 +270,7 @@ func (t *testRelay) testPauseAndResume(c *C, holder *realRelayHolder) { } func (t *testRelay) testUpdate(c *C, holder *realRelayHolder) { - cfg := &config.MysqlConfig{ + cfg := &config.SourceConfig{ From: config.DBConfig{ Host: "127.0.0.1", Port: 3306, diff --git a/dm/worker/server.go b/dm/worker/server.go index 01211e92f2..ee7e68b48e 100644 --- a/dm/worker/server.go +++ b/dm/worker/server.go @@ -24,8 +24,10 @@ import ( "github.com/pingcap/dm/dm/pb" "github.com/pingcap/dm/pkg/binlog" tcontext "github.com/pingcap/dm/pkg/context" + "github.com/pingcap/dm/pkg/ha" "github.com/pingcap/dm/pkg/log" "github.com/pingcap/dm/pkg/terror" + "github.com/pingcap/dm/pkg/utils" "github.com/pingcap/dm/syncer" "github.com/pingcap/errors" @@ -42,7 +44,7 @@ var ( dialTimeout = 3 * time.Second keepaliveTimeout = 3 * time.Second keepaliveTime = 3 * time.Second - retryConnectSleepTime = 2 * time.Second + retryConnectSleepTime = time.Second getMinPosForSubTaskFunc = getMinPosForSubTask ) @@ -61,10 +63,8 @@ type Server struct { rootLis net.Listener svr *grpc.Server worker *Worker + workerErr error etcdClient *clientv3.Client - - // false: has retried connecting to master again. - retryConnectMaster sync2.AtomicBool } // NewServer creates a new Server @@ -72,7 +72,6 @@ func NewServer(cfg *Config) *Server { s := Server{ cfg: cfg, } - s.retryConnectMaster.Set(true) s.closed.Set(true) // not start yet return &s } @@ -97,42 +96,39 @@ func (s *Server) Start() error { if err != nil { return err } + + bsm, revBound, err := ha.GetSourceBound(s.etcdClient, s.cfg.Name) + if err != nil { + // TODO: need retry + return err + } + if bound, ok := bsm[s.cfg.Name]; ok { + log.L().Warn("worker has been assigned source before keepalive") + err = s.operateSourceBound(bound) + if err != nil { + s.setWorkerErr(err, true) + log.L().Error("fail to operate sourceBound on worker", zap.String("worker", s.cfg.Name), + zap.String("source", bound.Source)) + } + } + sourceBoundCh := make(chan ha.SourceBound, 10) + sourceBoundErrCh := make(chan error, 10) + s.wg.Add(2) + go func() { + defer s.wg.Done() + ha.WatchSourceBound(s.ctx, s.etcdClient, s.cfg.Name, revBound+1, sourceBoundCh, sourceBoundErrCh) + }() + go func() { + defer s.wg.Done() + s.handleSourceBound(s.ctx, sourceBoundCh, sourceBoundErrCh) + }() + s.wg.Add(1) go func() { defer s.wg.Done() // worker keepalive with master // If worker loses connect from master, it would stop all task and try to connect master again. - shouldExit := false - var err1 error - for !shouldExit { - shouldExit, err1 = s.KeepAlive() - log.L().Warn("keepalive with master goroutine paused", zap.Error(err1)) - if err1 != nil || !shouldExit { - if s.retryConnectMaster.Get() { - // Try to connect master again before stop worker - s.retryConnectMaster.Set(false) - } else { - s.Lock() - w := s.getWorker(false) - if w != nil { - s.setWorker(nil, false) - s.Unlock() - w.Close() - } else { - s.Unlock() - } - } - select { - case <-s.ctx.Done(): - shouldExit = true - break - case <-time.After(retryConnectSleepTime): - // Try to connect master again - break - } - } - } - log.L().Info("keepalive with master goroutine exited!") + s.KeepAlive() }() // create a cmux @@ -212,6 +208,24 @@ func (s *Server) setWorker(worker *Worker, needLock bool) { s.worker = worker } +func (s *Server) getWorkerErr(needLock bool) error { + if needLock { + s.Lock() + defer s.Unlock() + } + return s.workerErr +} + +func (s *Server) setWorkerErr(workerErr error, needLock bool) { + if needLock { + s.Lock() + defer s.Unlock() + } + s.workerErr = workerErr +} + +// if sourceID is set to "", worker will be closed directly +// if sourceID is not "", we will check sourceID with w.cfg.SourceID func (s *Server) stopWorker(sourceID string) error { s.Lock() w := s.getWorker(false) @@ -219,7 +233,7 @@ func (s *Server) stopWorker(sourceID string) error { s.Unlock() return terror.ErrWorkerNoStart } - if w.cfg.SourceID != sourceID { + if sourceID != "" && w.cfg.SourceID != sourceID { s.Unlock() return terror.ErrWorkerSourceNotMatch } @@ -249,6 +263,40 @@ func (s *Server) retryWriteEctd(ops ...clientv3.Op) string { } } +func (s *Server) handleSourceBound(ctx context.Context, boundCh chan ha.SourceBound, errCh chan error) { + for { + select { + case <-ctx.Done(): + log.L().Info("worker server is closed, handleSourceBound will quit now") + return + case bound := <-boundCh: + err := s.operateSourceBound(bound) + s.setWorkerErr(err, true) + if err != nil { + // record the reason for operating source bound + // TODO: add better metrics + log.L().Error("fail to operate sourceBound on worker", zap.String("worker", s.cfg.Name), + zap.String("source", bound.Source), zap.Error(err)) + } + case err := <-errCh: + // TODO: Deal with err + log.L().Error("WatchSourceBound received an error", zap.Error(err)) + } + } +} + +func (s *Server) operateSourceBound(bound ha.SourceBound) error { + if bound.IsDeleted { + return s.stopWorker(bound.Source) + } + sourceCfg, _, err := ha.GetSourceCfg(s.etcdClient, bound.Source, bound.Revision) + if err != nil { + // TODO: need retry + return err + } + return s.startWorker(&sourceCfg) +} + // StartSubTask implements WorkerServer.StartSubTask func (s *Server) StartSubTask(ctx context.Context, req *pb.StartSubTaskRequest) (*pb.CommonWorkerResponse, error) { log.L().Info("", zap.String("request", "StartSubTask"), zap.Stringer("payload", req)) @@ -276,14 +324,7 @@ func (s *Server) StartSubTask(ctx context.Context, req *pb.StartSubTaskRequest) cfg.LogLevel = s.cfg.LogLevel cfg.LogFile = s.cfg.LogFile - err = w.StartSubTask(cfg) - - if err != nil { - err = terror.Annotatef(err, "start sub task %s", cfg.Name) - log.L().Error("fail to start subtask", zap.String("request", "StartSubTask"), zap.Stringer("payload", req), zap.Error(err)) - resp.Result = false - resp.Msg = err.Error() - } + w.StartSubTask(cfg) if resp.Result { op1 := clientv3.OpPut(common.UpstreamSubTaskKeyAdapter.Encode(cfg.SourceID, cfg.Name), req.Task) @@ -376,6 +417,9 @@ func (s *Server) QueryStatus(ctx context.Context, req *pb.QueryStatusRequest) (* log.L().Error("fail to call QueryStatus, because mysql worker has not been started") resp.Result = false resp.Msg = terror.ErrWorkerNoStart.Error() + if err := s.getWorkerErr(true); err != nil { + resp.Msg += "\nlast operate source worker error is: " + err.Error() + } return resp, nil } @@ -384,8 +428,8 @@ func (s *Server) QueryStatus(ctx context.Context, req *pb.QueryStatusRequest) (* Detail: []byte("relay is not enabled"), }, } - if s.worker.relayHolder != nil { - relayStatus = s.worker.relayHolder.Status() + if w.relayHolder != nil { + relayStatus = w.relayHolder.Status() } resp.SubTaskStatus = w.QueryStatus(req.Name) @@ -409,6 +453,9 @@ func (s *Server) QueryError(ctx context.Context, req *pb.QueryErrorRequest) (*pb log.L().Error("fail to call StartSubTask, because mysql worker has not been started") resp.Result = false resp.Msg = terror.ErrWorkerNoStart.Error() + if err := s.getWorkerErr(true); err != nil { + resp.Msg += "\nlast operate source worker error is: " + err.Error() + } return resp, nil } @@ -568,7 +615,7 @@ func (s *Server) MigrateRelay(ctx context.Context, req *pb.MigrateRelayRequest) return makeCommonWorkerResponse(err), nil } -func (s *Server) startWorker(cfg *config.MysqlConfig) error { +func (s *Server) startWorker(cfg *config.SourceConfig) error { s.Lock() defer s.Unlock() if w := s.getWorker(false); w != nil { @@ -580,25 +627,25 @@ func (s *Server) startWorker(cfg *config.MysqlConfig) error { return terror.ErrWorkerAlreadyStart.Generate() } - subTaskCfgs := make([]*config.SubTaskConfig, 0, 3) - - ectx, ecancel := context.WithTimeout(s.etcdClient.Ctx(), time.Second*3) - defer ecancel() - key := common.UpstreamSubTaskKeyAdapter.Encode(cfg.SourceID) - resp, err := s.etcdClient.KV.Get(ectx, key, clientv3.WithPrefix()) + // we get the newest subtask stages directly which will omit the subtask stage PUT/DELETE event + // because triggering these events is useless now + subTaskStages, revSubTask, err := ha.GetSubTaskStage(s.etcdClient, cfg.SourceID, "") if err != nil { + // TODO: need retry return err } - for _, kv := range resp.Kvs { - task := string(kv.Value) - subTaskcfg := config.NewSubTaskConfig() - if err = subTaskcfg.Decode(task); err != nil { - return err - } - subTaskcfg.LogLevel = s.cfg.LogLevel - subTaskcfg.LogFile = s.cfg.LogFile + subTaskCfgm, _, err := ha.GetSubTaskCfg(s.etcdClient, cfg.SourceID, "", revSubTask) + if err != nil { + // TODO: need retry + return err + } + + subTaskCfgs := make([]*config.SubTaskConfig, 0, len(subTaskCfgm)) + for _, subTaskCfg := range subTaskCfgm { + subTaskCfg.LogLevel = s.cfg.LogLevel + subTaskCfg.LogFile = s.cfg.LogFile - subTaskCfgs = append(subTaskCfgs, subTaskcfg) + subTaskCfgs = append(subTaskCfgs, &subTaskCfg) } dctx, dcancel := context.WithTimeout(s.etcdClient.Ctx(), time.Duration(len(subTaskCfgs))*3*time.Second) @@ -615,77 +662,80 @@ func (s *Server) startWorker(cfg *config.MysqlConfig) error { cfg.RelayBinlogGTID = "" } - log.L().Info("start workers", zap.Reflect("subTasks", subTaskCfgs)) + log.L().Info("start worker", zap.String("sourceCfg", cfg.String()), zap.Reflect("subTasks", subTaskCfgs)) w, err := NewWorker(cfg, s.etcdClient) if err != nil { return err } s.setWorker(w, false) + + startRelay := false + var revRelay int64 + if cfg.EnableRelay { + // TODO: if the sourceID is not changed and relay log is not too old, don't purge relay dir + err = w.purgeRelayDir() + if err != nil { + return err + } + var relayStage ha.Stage + // we get the newest relay stages directly which will omit the relay stage PUT/DELETE event + // because triggering these events is useless now + relayStage, revRelay, err = ha.GetRelayStage(s.etcdClient, cfg.SourceID) + if err != nil { + // TODO: need retry + return err + } + startRelay = !relayStage.IsDeleted && relayStage.Expect == pb.Stage_Running + } go func() { - w.Start() + w.Start(startRelay) }() - // FIXME: worker's closed will be set to false in Start. - // when start sub task, will check the `closed`, if closed is true, will ignore start subTask - // just sleep and make test success, will refine this later - time.Sleep(1 * time.Second) + isStarted := utils.WaitSomething(50, 100*time.Millisecond, func() bool { + return w.closed.Get() == closedFalse + }) + if !isStarted { + // TODO: add more mechanism to wait + return terror.ErrWorkerNoStart + } for _, subTaskCfg := range subTaskCfgs { - if err = w.StartSubTask(subTaskCfg); err != nil { - return err + expectStage := subTaskStages[subTaskCfg.Name] + if expectStage.IsDeleted || expectStage.Expect != pb.Stage_Running { + continue } + w.StartSubTask(subTaskCfg) log.L().Info("load subtask successful", zap.String("sourceID", subTaskCfg.SourceID), zap.String("task", subTaskCfg.Name)) } - return nil -} + subTaskStageCh := make(chan ha.Stage, 10) + subTaskErrCh := make(chan error, 10) + w.wg.Add(2) + go func() { + defer w.wg.Done() + ha.WatchSubTaskStage(w.ctx, s.etcdClient, cfg.SourceID, revSubTask+1, subTaskStageCh, subTaskErrCh) + }() + go func() { + defer w.wg.Done() + w.handleSubTaskStage(w.ctx, subTaskStageCh, subTaskErrCh) + }() -// OperateMysqlWorker create a new mysql task which will be running in this Server -func (s *Server) OperateMysqlWorker(ctx context.Context, req *pb.MysqlWorkerRequest) (*pb.MysqlWorkerResponse, error) { - log.L().Info("", zap.String("request", "OperateMysqlWorker"), zap.Stringer("payload", req)) - resp := &pb.MysqlWorkerResponse{ - Result: true, - Msg: "Operate mysql task successfully", - } - cfg := config.NewMysqlConfig() - err := cfg.Parse(req.Config) - if err != nil { - resp.Result = false - resp.Msg = errors.ErrorStack(err) - return resp, nil - } - if req.Op == pb.WorkerOp_UpdateConfig { - if err = s.stopWorker(cfg.SourceID); err != nil { - resp.Result = false - resp.Msg = errors.ErrorStack(err) - return resp, nil - } - } else if req.Op == pb.WorkerOp_StopWorker { - if err = s.stopWorker(cfg.SourceID); err == terror.ErrWorkerSourceNotMatch { - resp.Result = false - resp.Msg = errors.ErrorStack(err) - } + if cfg.EnableRelay { + relayStageCh := make(chan ha.Stage, 10) + relayErrCh := make(chan error, 10) + w.wg.Add(2) + go func() { + defer w.wg.Done() + ha.WatchRelayStage(w.ctx, s.etcdClient, cfg.SourceID, revRelay+1, relayStageCh, relayErrCh) + }() + go func() { + defer w.wg.Done() + w.handleRelayStage(w.ctx, relayStageCh, relayErrCh) + }() } - if resp.Result && (req.Op == pb.WorkerOp_UpdateConfig || req.Op == pb.WorkerOp_StartWorker) { - err = s.startWorker(cfg) - } - if err != nil { - resp.Result = false - resp.Msg = errors.ErrorStack(err) - } - if resp.Result { - op1 := clientv3.OpPut(common.UpstreamConfigKeyAdapter.Encode(cfg.SourceID), req.Config) - op2 := clientv3.OpPut(common.UpstreamBoundWorkerKeyAdapter.Encode(s.cfg.AdvertiseAddr), cfg.SourceID) - if req.Op == pb.WorkerOp_StopWorker { - op1 = clientv3.OpDelete(common.UpstreamConfigKeyAdapter.Encode(cfg.SourceID)) - op2 = clientv3.OpDelete(common.UpstreamBoundWorkerKeyAdapter.Encode(s.cfg.AdvertiseAddr)) - } - resp.Msg = s.retryWriteEctd(op1, op2) - // Because etcd was deployed with master in a single process, if we can not write data into etcd, most probably - // the have lost connect from master. - } - return resp, nil + + return nil } func makeCommonWorkerResponse(reqErr error) *pb.CommonWorkerResponse { diff --git a/dm/worker/server_test.go b/dm/worker/server_test.go index cb62229b3b..878fac8e2e 100644 --- a/dm/worker/server_test.go +++ b/dm/worker/server_test.go @@ -15,7 +15,6 @@ package worker import ( "context" - "fmt" "io/ioutil" "net/http" "net/url" @@ -29,14 +28,17 @@ import ( "go.etcd.io/etcd/embed" "google.golang.org/grpc" - "github.com/pingcap/dm/dm/common" "github.com/pingcap/dm/dm/config" "github.com/pingcap/dm/dm/pb" + "github.com/pingcap/dm/dm/unit" + "github.com/pingcap/dm/pkg/ha" "github.com/pingcap/dm/pkg/log" "github.com/pingcap/dm/pkg/terror" "github.com/pingcap/dm/pkg/utils" ) +var mysqlCfgDir = "./source.toml" + func TestServer(t *testing.T) { TestingT(t) } @@ -94,6 +96,23 @@ func (t *testServer) TestServer(c *C) { defer func() { NewRelayHolder = NewRealRelayHolder }() + NewSubTask = func(cfg *config.SubTaskConfig, etcdClient *clientv3.Client) *SubTask { + cfg.UseRelay = false + return NewRealSubTask(cfg, etcdClient) + } + defer func() { + NewSubTask = NewRealSubTask + }() + + createUnits = func(cfg *config.SubTaskConfig, etcdClient *clientv3.Client) []unit.Unit { + mockDumper := NewMockUnit(pb.UnitType_Dump) + mockLoader := NewMockUnit(pb.UnitType_Load) + mockSync := NewMockUnit(pb.UnitType_Sync) + return []unit.Unit{mockDumper, mockLoader, mockSync} + } + defer func() { + createUnits = createRealUnits + }() s := NewServer(cfg) go func() { @@ -106,18 +125,14 @@ func (t *testServer) TestServer(c *C) { return !s.closed.Get() }), IsTrue) dir := c.MkDir() - t.testOperateWorker(c, s, dir, true) - // check infos have be written into ETCD success. - t.testInfosInEtcd(c, hostName, cfg.AdvertiseAddr, dir) + t.testOperateWorker(c, s, dir, true) // check worker would retry connecting master rather than stop worker directly. ETCD = t.testRetryConnectMaster(c, s, ETCD, etcdDir, hostName) - mysqlCfg := &config.MysqlConfig{} - c.Assert(mysqlCfg.LoadFromFile("./dm-mysql.toml"), IsNil) - err = s.startWorker(mysqlCfg) - c.Assert(err, IsNil) + // resume contact with ETCD and start worker again + t.testOperateWorker(c, s, dir, true) // test condition hub t.testConidtionHub(c, s) @@ -129,37 +144,54 @@ func (t *testServer) TestServer(c *C) { cli := t.createClient(c, "127.0.0.1:8262") // start task - subtaskCfgBytes, err := ioutil.ReadFile("./subtask.toml") + subtaskCfg := config.SubTaskConfig{} + err = subtaskCfg.DecodeFile("./subtask.toml") c.Assert(err, IsNil) - resp1, err := cli.StartSubTask(context.Background(), &pb.StartSubTaskRequest{ - Task: string(subtaskCfgBytes), - }) + sourceCfg := loadSourceConfigWithoutPassword(c) + subtaskCfg.MydumperPath = "../../bin/mydumper" + _, err = ha.PutSubTaskCfg(s.etcdClient, subtaskCfg) c.Assert(err, IsNil) - c.Assert(resp1.Result, IsTrue) - - status, err := cli.QueryStatus(context.Background(), &pb.QueryStatusRequest{Name: "sub-task-name"}) + _, err = ha.PutSubTaskCfgStage(s.etcdClient, []config.SubTaskConfig{subtaskCfg}, + []ha.Stage{ha.NewSubTaskStage(pb.Stage_Running, sourceCfg.SourceID, subtaskCfg.Name)}) c.Assert(err, IsNil) - c.Assert(status.Result, IsTrue) - c.Assert(status.SubTaskStatus[0].Stage, Equals, pb.Stage_Paused) // because of `Access denied` - t.testSubTaskRecover(c, s, dir, hostName, string(subtaskCfgBytes)) + c.Assert(utils.WaitSomething(30, 100*time.Millisecond, func() bool { + return checkSubTaskStatus(cli, pb.Stage_Running) + }), IsTrue) + + t.testSubTaskRecover(c, s, dir) - // update task - resp2, err := cli.UpdateSubTask(context.Background(), &pb.UpdateSubTaskRequest{ - Task: string(subtaskCfgBytes), - }) + // pause relay + _, err = ha.PutRelayStage(s.etcdClient, ha.NewRelayStage(pb.Stage_Paused, sourceCfg.SourceID)) c.Assert(err, IsNil) - c.Assert(resp2.Result, IsTrue) - - // operate task - resp3, err := cli.OperateSubTask(context.Background(), &pb.OperateSubTaskRequest{ - Name: "sub-task-name", - Op: pb.TaskOp_Pause, - }) + c.Assert(utils.WaitSomething(30, 100*time.Millisecond, func() bool { + return checkRelayStatus(cli, pb.Stage_Paused) + }), IsTrue) + // resume relay + _, err = ha.PutRelayStage(s.etcdClient, ha.NewRelayStage(pb.Stage_Running, sourceCfg.SourceID)) c.Assert(err, IsNil) - c.Assert(resp3.Result, IsFalse) - c.Assert(resp3.Msg, Matches, ".*current stage is not running.*") + c.Assert(utils.WaitSomething(30, 100*time.Millisecond, func() bool { + return checkRelayStatus(cli, pb.Stage_Running) + }), IsTrue) + // pause task + _, err = ha.PutSubTaskStage(s.etcdClient, ha.NewSubTaskStage(pb.Stage_Paused, sourceCfg.SourceID, subtaskCfg.Name)) + c.Assert(err, IsNil) + c.Assert(utils.WaitSomething(30, 100*time.Millisecond, func() bool { + return checkSubTaskStatus(cli, pb.Stage_Paused) + }), IsTrue) + // resume task + _, err = ha.PutSubTaskStage(s.etcdClient, ha.NewSubTaskStage(pb.Stage_Running, sourceCfg.SourceID, subtaskCfg.Name)) + c.Assert(err, IsNil) + c.Assert(utils.WaitSomething(30, 100*time.Millisecond, func() bool { + return checkSubTaskStatus(cli, pb.Stage_Running) + }), IsTrue) + // stop task + _, err = ha.DeleteSubTaskStage(s.etcdClient, ha.NewSubTaskStage(pb.Stage_Stopped, sourceCfg.SourceID, subtaskCfg.Name)) + c.Assert(err, IsNil) + c.Assert(utils.WaitSomething(30, 100*time.Millisecond, func() bool { + return s.getWorker(true).subTaskHolder.findSubTask(subtaskCfg.Name) == nil + }), IsTrue) dupServer := NewServer(cfg) err = dupServer.Start() @@ -194,146 +226,73 @@ func (t *testServer) createClient(c *C, addr string) pb.WorkerClient { } func (t *testServer) testOperateWorker(c *C, s *Server, dir string, start bool) { - workerCfg := &config.MysqlConfig{} - err := workerCfg.LoadFromFile("./dm-mysql.toml") - c.Assert(err, IsNil) - workerCfg.RelayDir = dir - workerCfg.MetaDir = dir - cli := t.createClient(c, "127.0.0.1:8262") - task, err := workerCfg.Toml() - c.Assert(err, IsNil) - req := &pb.MysqlWorkerRequest{ - Op: pb.WorkerOp_UpdateConfig, - Config: task, - } + // load sourceCfg + sourceCfg := loadSourceConfigWithoutPassword(c) + sourceCfg.EnableRelay = true + sourceCfg.RelayDir = dir + sourceCfg.MetaDir = c.MkDir() + if start { - resp, err := cli.OperateMysqlWorker(context.Background(), req) - c.Assert(err, IsNil) - c.Assert(resp.Result, Equals, false) - c.Assert(resp.Msg, Matches, ".*worker has not started.*") - req.Op = pb.WorkerOp_StartWorker - resp, err = cli.OperateMysqlWorker(context.Background(), req) + // put mysql config into relative etcd key adapter to trigger operation event + _, err := ha.PutSourceCfg(s.etcdClient, sourceCfg) c.Assert(err, IsNil) - fmt.Println(resp.Msg) - c.Assert(resp.Result, Equals, true) - - req.Op = pb.WorkerOp_UpdateConfig - resp, err = cli.OperateMysqlWorker(context.Background(), req) + _, err = ha.PutRelayStageSourceBound(s.etcdClient, ha.NewRelayStage(pb.Stage_Running, sourceCfg.SourceID), + ha.NewSourceBound(sourceCfg.SourceID, s.cfg.Name)) c.Assert(err, IsNil) - c.Assert(resp.Result, Equals, true) - c.Assert(s.getWorker(true), NotNil) + // worker should be started and without error + c.Assert(utils.WaitSomething(30, 100*time.Millisecond, func() bool { + w := s.getWorker(true) + return w != nil && w.closed.Get() == closedFalse + }), IsTrue) + c.Assert(s.getWorkerErr(true), IsNil) } else { - req.Op = pb.WorkerOp_StopWorker - resp, err := cli.OperateMysqlWorker(context.Background(), req) + // worker should be started before stopped + w := s.getWorker(true) + c.Assert(w, NotNil) + c.Assert(w.closed.Get() == closedFalse, IsTrue) + _, err := ha.DeleteSourceCfgRelayStageSourceBound(s.etcdClient, sourceCfg.SourceID, s.cfg.Name) c.Assert(err, IsNil) - c.Assert(resp.Result, Equals, true) + // worker should be started and without error + c.Assert(utils.WaitSomething(30, 100*time.Millisecond, func() bool { + currentWorker := s.getWorker(true) + return currentWorker == nil && w.closed.Get() == closedTrue + }), IsTrue) + c.Assert(s.getWorkerErr(true), IsNil) } } -func (t *testServer) testInfosInEtcd(c *C, hostName string, workerAddr string, dir string) { - cli, err := clientv3.New(clientv3.Config{ - Endpoints: GetJoinURLs(hostName), - DialTimeout: dialTimeout, - DialKeepAliveTime: keepaliveTime, - DialKeepAliveTimeout: keepaliveTimeout, - }) - c.Assert(err, IsNil) - cfg := &config.MysqlConfig{} - err = cfg.LoadFromFile("./dm-mysql.toml") - cfg.RelayDir = dir - cfg.MetaDir = dir - - c.Assert(err, IsNil) - task, err := cfg.Toml() - c.Assert(err, IsNil) - - resp, err := cli.Get(context.Background(), common.UpstreamConfigKeyAdapter.Encode(cfg.SourceID)) - c.Assert(err, IsNil) - c.Assert(len(resp.Kvs), Equals, 1) - c.Assert(string(resp.Kvs[0].Value), Equals, task) - - resp, err = cli.Get(context.Background(), common.UpstreamBoundWorkerKeyAdapter.Encode(workerAddr)) - c.Assert(err, IsNil) - c.Assert(len(resp.Kvs), Equals, 1) - c.Assert(string(resp.Kvs[0].Value), Equals, cfg.SourceID) -} - func (t *testServer) testRetryConnectMaster(c *C, s *Server, ETCD *embed.Etcd, dir string, hostName string) *embed.Etcd { ETCD.Close() time.Sleep(4 * time.Second) - c.Assert(s.getWorker(true), NotNil) - // retryConnectMaster is false means that this worker has been tried to connect to master again. - c.Assert(s.retryConnectMaster.Get(), IsFalse) + // When worker server fail to keepalive with etcd, sever should close its worker + c.Assert(s.getWorker(true), IsNil) + c.Assert(s.getWorkerErr(true), IsNil) ETCD, err := createMockETCD(dir, "host://"+hostName) c.Assert(err, IsNil) time.Sleep(3 * time.Second) return ETCD } -func (t *testServer) testSubTaskRecover(c *C, s *Server, dir string, hostName string, subCfgStr string) { - cfg := &config.MysqlConfig{} - err := cfg.LoadFromFile("./dm-mysql.toml") - c.Assert(err, IsNil) - cfg.RelayDir = dir - cfg.MetaDir = dir - cli, err := clientv3.New(clientv3.Config{ - Endpoints: GetJoinURLs(hostName), - DialTimeout: dialTimeout, - DialKeepAliveTime: keepaliveTime, - DialKeepAliveTimeout: keepaliveTimeout, - }) - c.Assert(err, IsNil) - - subCfg := config.NewSubTaskConfig() - err = subCfg.Decode(subCfgStr) - c.Assert(err, IsNil) - c.Assert(cfg.SourceID, Equals, subCfg.SourceID) - - { - resp, err := cli.Get(context.Background(), common.UpstreamSubTaskKeyAdapter.Encode(cfg.SourceID), clientv3.WithPrefix()) - c.Assert(err, IsNil) - c.Assert(len(resp.Kvs), Equals, 1) - infos, err := common.UpstreamSubTaskKeyAdapter.Decode(string(resp.Kvs[0].Key)) - c.Assert(err, IsNil) - c.Assert(infos[1], Equals, subCfg.Name) - task := string(resp.Kvs[0].Value) - c.Assert(task, Equals, subCfgStr) - } - +func (t *testServer) testSubTaskRecover(c *C, s *Server, dir string) { workerCli := t.createClient(c, "127.0.0.1:8262") - mysqlTask, err := cfg.Toml() - c.Assert(err, IsNil) - req := &pb.MysqlWorkerRequest{ - Op: pb.WorkerOp_StopWorker, - Config: mysqlTask, - } - - resp, err := workerCli.OperateMysqlWorker(context.Background(), req) - c.Assert(err, IsNil) - c.Assert(resp.Result, Equals, true) + t.testOperateWorker(c, s, dir, false) status, err := workerCli.QueryStatus(context.Background(), &pb.QueryStatusRequest{Name: "sub-task-name"}) c.Assert(err, IsNil) c.Assert(status.Result, IsFalse) c.Assert(status.Msg, Equals, terror.ErrWorkerNoStart.Error()) - req.Op = pb.WorkerOp_StartWorker - resp, err = workerCli.OperateMysqlWorker(context.Background(), req) - c.Assert(err, IsNil) - c.Assert(resp.Result, Equals, true) - + t.testOperateWorker(c, s, dir, true) status, err = workerCli.QueryStatus(context.Background(), &pb.QueryStatusRequest{Name: "sub-task-name"}) c.Assert(err, IsNil) c.Assert(status.Result, IsTrue) - c.Assert(status.SubTaskStatus[0].Stage, Equals, pb.Stage_Paused) // because of `Access denied` + c.Assert(status.SubTaskStatus[0].Stage, Equals, pb.Stage_Running) } func (t *testServer) testStopWorkerWhenLostConnect(c *C, s *Server, ETCD *embed.Etcd) { - c.Assert(s.retryConnectMaster.Get(), IsTrue) ETCD.Close() time.Sleep(retryConnectSleepTime + time.Duration(defaultKeepAliveTTL+3)*time.Second) c.Assert(s.getWorker(true), IsNil) - c.Assert(s.retryConnectMaster.Get(), IsFalse) } func (t *testServer) TestGetMinPosInAllSubTasks(c *C) { @@ -372,3 +331,33 @@ func getFakePosForSubTask(ctx context.Context, subTaskCfg *config.SubTaskConfig) return nil, nil } } + +func checkSubTaskStatus(cli pb.WorkerClient, expect pb.Stage) bool { + status, err := cli.QueryStatus(context.Background(), &pb.QueryStatusRequest{Name: "sub-task-name"}) + if err != nil { + return false + } + if status.Result == false { + return false + } + return len(status.SubTaskStatus) > 0 && status.SubTaskStatus[0].Stage == expect +} + +func checkRelayStatus(cli pb.WorkerClient, expect pb.Stage) bool { + status, err := cli.QueryStatus(context.Background(), &pb.QueryStatusRequest{Name: "sub-task-name"}) + if err != nil { + return false + } + if status.Result == false { + return false + } + return status.RelayStatus.Stage == expect +} + +func loadSourceConfigWithoutPassword(c *C) config.SourceConfig { + sourceCfg := config.SourceConfig{} + err := sourceCfg.LoadFromFile(mysqlCfgDir) + c.Assert(err, IsNil) + sourceCfg.From.Password = "" // no password set + return sourceCfg +} diff --git a/dm/worker/dm-mysql.toml b/dm/worker/source.toml similarity index 100% rename from dm/worker/dm-mysql.toml rename to dm/worker/source.toml diff --git a/dm/worker/subtask.go b/dm/worker/subtask.go index 94f416a6d4..7720b98ccc 100644 --- a/dm/worker/subtask.go +++ b/dm/worker/subtask.go @@ -35,8 +35,12 @@ import ( "github.com/pingcap/dm/syncer" ) -// createUnits creates process units base on task mode -func createUnits(cfg *config.SubTaskConfig, etcdClient *clientv3.Client) []unit.Unit { +// createRealUnits is subtask units initializer +// it can be used for testing +var createUnits = createRealUnits + +// createRealUnits creates process units base on task mode +func createRealUnits(cfg *config.SubTaskConfig, etcdClient *clientv3.Client) []unit.Unit { failpoint.Inject("mockCreateUnitsDumpOnly", func(_ failpoint.Value) { log.L().Info("create mock worker units with dump unit only", zap.String("failpoint", "mockCreateUnitsDumpOnly")) failpoint.Return([]unit.Unit{mydumper.NewMydumper(cfg)}) @@ -83,8 +87,12 @@ type SubTask struct { etcdClient *clientv3.Client } -// NewSubTask creates a new SubTask -func NewSubTask(cfg *config.SubTaskConfig, etcdClient *clientv3.Client) *SubTask { +// NewSubTask is subtask initializer +// it can be used for testing +var NewSubTask = NewRealSubTask + +// NewRealSubTask creates a new SubTask +func NewRealSubTask(cfg *config.SubTaskConfig, etcdClient *clientv3.Client) *SubTask { return NewSubTaskWithStage(cfg, pb.Stage_New, etcdClient) } @@ -92,7 +100,6 @@ func NewSubTask(cfg *config.SubTaskConfig, etcdClient *clientv3.Client) *SubTask func NewSubTaskWithStage(cfg *config.SubTaskConfig, stage pb.Stage, etcdClient *clientv3.Client) *SubTask { st := SubTask{ cfg: cfg, - units: createUnits(cfg, etcdClient), stage: stage, l: log.With(zap.String("subtask", cfg.Name)), etcdClient: etcdClient, @@ -103,6 +110,12 @@ func NewSubTaskWithStage(cfg *config.SubTaskConfig, stage pb.Stage, etcdClient * // Init initializes the sub task processing units func (st *SubTask) Init() error { + cfgDecrypted, err := st.cfg.DecryptPassword() + if err != nil { + return err + } + st.cfg = cfgDecrypted + st.units = createUnits(st.cfg, st.etcdClient) if len(st.units) < 1 { return terror.ErrWorkerNoAvailUnits.Generate(st.cfg.Name, st.cfg.Mode) } diff --git a/dm/worker/subtask_test.go b/dm/worker/subtask_test.go index 6d0864e3f7..833a283ffd 100644 --- a/dm/worker/subtask_test.go +++ b/dm/worker/subtask_test.go @@ -27,6 +27,7 @@ import ( . "github.com/pingcap/check" "github.com/pingcap/errors" + "go.etcd.io/etcd/clientv3" ) type testSubTask struct{} @@ -115,7 +116,20 @@ func (m *MockUnit) Update(cfg *config.SubTaskConfig) error { return m.errUpdate } -func (m *MockUnit) Status() interface{} { return nil } +func (m *MockUnit) Status() interface{} { + switch m.typ { + case pb.UnitType_Check: + return &pb.CheckStatus{} + case pb.UnitType_Dump: + return &pb.DumpStatus{} + case pb.UnitType_Load: + return &pb.LoadStatus{} + case pb.UnitType_Sync: + return &pb.SyncStatus{} + default: + return struct{}{} + } +} func (m *MockUnit) Error() interface{} { return nil } @@ -152,14 +166,21 @@ func (t *testSubTask) TestSubTaskNormalUsage(c *C) { c.Assert(st.Stage(), DeepEquals, pb.Stage_New) // test empty and fail - st.units = nil + defer func() { + createUnits = createRealUnits + }() + createUnits = func(cfg *config.SubTaskConfig, etcdClient *clientv3.Client) []unit.Unit { + return nil + } st.Run() c.Assert(st.Stage(), Equals, pb.Stage_Paused) c.Assert(strings.Contains(st.Result().Errors[0].Msg, "has no dm units for mode"), IsTrue) mockDumper := NewMockUnit(pb.UnitType_Dump) mockLoader := NewMockUnit(pb.UnitType_Load) - st.units = []unit.Unit{mockDumper, mockLoader} + createUnits = func(cfg *config.SubTaskConfig, etcdClient *clientv3.Client) []unit.Unit { + return []unit.Unit{mockDumper, mockLoader} + } st.Run() c.Assert(st.Stage(), Equals, pb.Stage_Running) @@ -266,7 +287,12 @@ func (t *testSubTask) TestPauseAndResumeSubtask(c *C) { mockDumper := NewMockUnit(pb.UnitType_Dump) mockLoader := NewMockUnit(pb.UnitType_Load) - st.units = []unit.Unit{mockDumper, mockLoader} + defer func() { + createUnits = createRealUnits + }() + createUnits = func(cfg *config.SubTaskConfig, etcdClient *clientv3.Client) []unit.Unit { + return []unit.Unit{mockDumper, mockLoader} + } st.Run() c.Assert(st.Stage(), Equals, pb.Stage_Running) @@ -403,7 +429,12 @@ func (t *testSubTask) TestSubtaskWithStage(c *C) { mockDumper := NewMockUnit(pb.UnitType_Dump) mockLoader := NewMockUnit(pb.UnitType_Load) - st.units = []unit.Unit{mockDumper, mockLoader} + defer func() { + createUnits = createRealUnits + }() + createUnits = func(cfg *config.SubTaskConfig, etcdClient *clientv3.Client) []unit.Unit { + return []unit.Unit{mockDumper, mockLoader} + } // pause c.Assert(st.Pause(), NotNil) @@ -427,7 +458,9 @@ func (t *testSubTask) TestSubtaskWithStage(c *C) { st = NewSubTaskWithStage(cfg, pb.Stage_Finished, nil) c.Assert(st.Stage(), DeepEquals, pb.Stage_Finished) - st.units = []unit.Unit{mockDumper, mockLoader} + createUnits = func(cfg *config.SubTaskConfig, etcdClient *clientv3.Client) []unit.Unit { + return []unit.Unit{mockDumper, mockLoader} + } // close again st.Close() diff --git a/dm/worker/task_checker.go b/dm/worker/task_checker.go index 7232f7748b..3b9dd3c64c 100644 --- a/dm/worker/task_checker.go +++ b/dm/worker/task_checker.go @@ -17,7 +17,6 @@ import ( "context" "encoding/json" "fmt" - "github.com/pingcap/dm/dm/config" "strings" "sync" "time" @@ -26,6 +25,7 @@ import ( "github.com/siddontang/go/sync2" "go.uber.org/zap" + "github.com/pingcap/dm/dm/config" "github.com/pingcap/dm/dm/pb" "github.com/pingcap/dm/pkg/backoff" "github.com/pingcap/dm/pkg/log" diff --git a/dm/worker/task_checker_test.go b/dm/worker/task_checker_test.go index 9541666409..6df94c5733 100644 --- a/dm/worker/task_checker_test.go +++ b/dm/worker/task_checker_test.go @@ -87,11 +87,10 @@ func (s *testTaskCheckerSuite) TestCheck(c *check.C) { NewRelayHolder = NewDummyRelayHolder dir := c.MkDir() - cfg := &config.MysqlConfig{} - c.Assert(cfg.LoadFromFile("./dm-mysql.toml"), check.IsNil) + cfg := loadSourceConfigWithoutPassword(c) cfg.RelayDir = dir cfg.MetaDir = dir - w, err := NewWorker(cfg, nil) + w, err := NewWorker(&cfg, nil) c.Assert(err, check.IsNil) w.closed.Set(closedFalse) @@ -205,12 +204,10 @@ func (s *testTaskCheckerSuite) TestCheckTaskIndependent(c *check.C) { NewRelayHolder = NewDummyRelayHolder dir := c.MkDir() - // cfg := NewConfig() - cfg := &config.MysqlConfig{} - c.Assert(cfg.LoadFromFile("./dm-mysql.toml"), check.IsNil) + cfg := loadSourceConfigWithoutPassword(c) cfg.RelayDir = dir cfg.MetaDir = dir - w, err := NewWorker(cfg, nil) + w, err := NewWorker(&cfg, nil) c.Assert(err, check.IsNil) w.closed.Set(closedFalse) diff --git a/dm/worker/worker.go b/dm/worker/worker.go index 81a2b56ded..b1b70ee390 100644 --- a/dm/worker/worker.go +++ b/dm/worker/worker.go @@ -16,6 +16,8 @@ package worker import ( "context" "fmt" + "os" + "path/filepath" "sync" "time" @@ -26,6 +28,7 @@ import ( "github.com/pingcap/dm/dm/config" "github.com/pingcap/dm/dm/pb" + "github.com/pingcap/dm/pkg/ha" "github.com/pingcap/dm/pkg/log" "github.com/pingcap/dm/pkg/terror" "github.com/pingcap/dm/pkg/tracing" @@ -49,7 +52,7 @@ type Worker struct { ctx context.Context cancel context.CancelFunc - cfg *config.MysqlConfig + cfg *config.SourceConfig l log.Logger subTaskHolder *subTaskHolder @@ -66,7 +69,7 @@ type Worker struct { } // NewWorker creates a new Worker -func NewWorker(cfg *config.MysqlConfig, etcdClient *clientv3.Client) (w *Worker, err error) { +func NewWorker(cfg *config.SourceConfig, etcdClient *clientv3.Client) (w *Worker, err error) { w = &Worker{ cfg: cfg, tracer: tracing.InitTracerHub(cfg.Tracer), @@ -115,9 +118,10 @@ func NewWorker(cfg *config.MysqlConfig, etcdClient *clientv3.Client) (w *Worker, } // Start starts working -func (w *Worker) Start() { +func (w *Worker) Start(startRelay bool) { - if w.cfg.EnableRelay { + if w.cfg.EnableRelay && startRelay { + log.L().Info("relay is started") // start relay w.relayHolder.Start() @@ -196,34 +200,28 @@ func (w *Worker) Close() { } // StartSubTask creates a sub task an run it -func (w *Worker) StartSubTask(cfg *config.SubTaskConfig) error { +func (w *Worker) StartSubTask(cfg *config.SubTaskConfig) { w.Lock() defer w.Unlock() + // copy some config item from dm-worker's config + w.copyConfigFromWorker(cfg) + // directly put cfg into subTaskHolder + // the unique of subtask should be assured by etcd + st := NewSubTask(cfg, w.etcdClient) + w.subTaskHolder.recordSubTask(st) if w.closed.Get() == closedTrue { - return terror.ErrWorkerAlreadyClosed.Generate() + st.fail(terror.ErrWorkerAlreadyClosed.Generate()) + return } if w.relayPurger != nil && w.relayPurger.Purging() { - return terror.ErrWorkerRelayIsPurging.Generate(cfg.Name) - } - - if w.subTaskHolder.findSubTask(cfg.Name) != nil { - return terror.ErrWorkerSubTaskExists.Generate(cfg.Name) - } - - // copy some config item from dm-worker's config - w.copyConfigFromWorker(cfg) - cfgDecrypted, err := cfg.DecryptPassword() - if err != nil { - return terror.WithClass(err, terror.ClassDMWorker) + st.fail(terror.ErrWorkerRelayIsPurging.Generate(cfg.Name)) + return } - w.l.Info("started sub task", zap.Stringer("config", cfgDecrypted)) - st := NewSubTask(cfgDecrypted, w.etcdClient) - w.subTaskHolder.recordSubTask(st) + w.l.Info("started sub task", zap.Stringer("config", cfg)) st.Run() - return nil } // UpdateSubTask update config for a sub task @@ -306,6 +304,113 @@ func (w *Worker) QueryError(name string) []*pb.SubTaskError { return w.Error(name) } +// purgeRelayDir will clear all contents under w.cfg.RelayDir +func (w *Worker) purgeRelayDir() error { + if !w.cfg.EnableRelay { + return nil + } + dir := w.cfg.RelayDir + d, err := os.Open(dir) + // fail to open dir, return directly + if err != nil { + if err == os.ErrNotExist { + return nil + } + return err + } + defer d.Close() + names, err := d.Readdirnames(-1) + if err != nil { + return err + } + for _, name := range names { + err = os.RemoveAll(filepath.Join(dir, name)) + if err != nil { + return err + } + } + log.L().Info("relay dir is purged to be ready for new relay log", zap.String("relayDir", dir)) + return nil +} + +func (w *Worker) handleSubTaskStage(ctx context.Context, stageCh chan ha.Stage, errCh chan error) { + for { + select { + case <-ctx.Done(): + log.L().Info("worker is closed, handleSubTaskStage will quit now") + return + case stage := <-stageCh: + err := w.operateSubTaskStage(stage) + if err != nil { + // TODO: add better metrics + log.L().Error("fail to operate subtask stage", zap.Stringer("stage", stage), zap.Error(err)) + } + case err := <-errCh: + // TODO: deal with err + log.L().Error("WatchSubTaskStage received an error", zap.Error(err)) + } + } +} + +func (w *Worker) operateSubTaskStage(stage ha.Stage) error { + var op pb.TaskOp + switch { + case stage.Expect == pb.Stage_Running: + if st := w.subTaskHolder.findSubTask(stage.Task); st == nil { + tsm, _, err := ha.GetSubTaskCfg(w.etcdClient, stage.Source, stage.Task, stage.Revision) + if err != nil { + // TODO: need retry + return terror.Annotate(err, "fail to get subtask config from etcd") + } + subTaskCfg := tsm[stage.Task] + w.StartSubTask(&subTaskCfg) + return nil + } + op = pb.TaskOp_Resume + case stage.Expect == pb.Stage_Paused: + op = pb.TaskOp_Pause + case stage.IsDeleted: + op = pb.TaskOp_Stop + } + return w.OperateSubTask(stage.Task, op) +} + +func (w *Worker) handleRelayStage(ctx context.Context, stageCh chan ha.Stage, errCh chan error) { + for { + select { + case <-ctx.Done(): + log.L().Info("worker is closed, handleRelayStage will quit now") + return + case stage := <-stageCh: + err := w.operateRelayStage(ctx, stage) + if err != nil { + // TODO: add better metrics + log.L().Error("fail to operate relay", zap.Stringer("stage", stage), zap.Error(err)) + } + case err := <-errCh: + log.L().Error("WatchRelayStage received an error", zap.Error(err)) + } + } +} + +func (w *Worker) operateRelayStage(ctx context.Context, stage ha.Stage) error { + var op pb.RelayOp + switch { + case stage.Expect == pb.Stage_Running: + if w.relayHolder.Stage() == pb.Stage_New { + w.relayHolder.Start() + w.relayPurger.Start() + return nil + } + op = pb.RelayOp_ResumeRelay + case stage.Expect == pb.Stage_Paused: + op = pb.RelayOp_PauseRelay + case stage.IsDeleted: + op = pb.RelayOp_StopRelay + } + return w.OperateRelay(ctx, &pb.OperateRelayRequest{Op: op}) +} + // HandleSQLs implements Handler.HandleSQLs. func (w *Worker) HandleSQLs(ctx context.Context, req *pb.HandleSubTaskSQLsRequest) error { if w.closed.Get() == closedTrue { @@ -381,7 +486,7 @@ func (w *Worker) ForbidPurge() (bool, string) { } // QueryConfig returns worker's config -func (w *Worker) QueryConfig(ctx context.Context) (*config.MysqlConfig, error) { +func (w *Worker) QueryConfig(ctx context.Context) (*config.SourceConfig, error) { w.RLock() defer w.RUnlock() @@ -420,7 +525,7 @@ func (w *Worker) UpdateRelayConfig(ctx context.Context, content string) error { } // No need to store config in local - newCfg := &config.MysqlConfig{} + newCfg := &config.SourceConfig{} err := newCfg.Parse(content) if err != nil { diff --git a/dm/worker/worker_test.go b/dm/worker/worker_test.go index 28ae8002dd..367458b78c 100644 --- a/dm/worker/worker_test.go +++ b/dm/worker/worker_test.go @@ -17,6 +17,8 @@ import ( "context" "fmt" "io/ioutil" + "os" + "path/filepath" "strings" "time" @@ -31,8 +33,7 @@ import ( var emptyWorkerStatusInfoJSONLength = 25 func (t *testServer) testWorker(c *C) { - cfg := &config.MysqlConfig{} - c.Assert(cfg.LoadFromFile("./dm-mysql.toml"), IsNil) + cfg := loadSourceConfigWithoutPassword(c) dir := c.MkDir() cfg.EnableRelay = true @@ -44,11 +45,11 @@ func (t *testServer) testWorker(c *C) { NewRelayHolder = NewRealRelayHolder }() - _, err := NewWorker(cfg, nil) + _, err := NewWorker(&cfg, nil) c.Assert(err, ErrorMatches, "init error") NewRelayHolder = NewDummyRelayHolder - w, err := NewWorker(cfg, nil) + w, err := NewWorker(&cfg, nil) c.Assert(err, IsNil) c.Assert(w.StatusJSON(""), HasLen, emptyWorkerStatusInfoJSONLength) //c.Assert(w.closed.Get(), Equals, closedFalse) @@ -65,11 +66,12 @@ func (t *testServer) testWorker(c *C) { c.Assert(w.subTaskHolder.getAllSubTasks(), HasLen, 0) c.Assert(w.closed.Get(), Equals, closedTrue) - err = w.StartSubTask(&config.SubTaskConfig{ + w.StartSubTask(&config.SubTaskConfig{ Name: "testStartTask", }) - c.Assert(err, NotNil) - c.Assert(err, ErrorMatches, ".*worker already closed.*") + task := w.subTaskHolder.findSubTask("testStartTask") + c.Assert(task, NotNil) + c.Assert(task.Result().String(), Matches, ".*worker already closed.*") err = w.UpdateSubTask(&config.SubTaskConfig{ Name: "testStartTask", @@ -92,20 +94,19 @@ func (t *testServer) TestTaskAutoResume(c *C) { defer ETCD.Close() cfg := NewConfig() - workerCfg := config.NewMysqlConfig() - workerCfg.LoadFromFile("./dm-mysql.toml") + sourceConfig := loadSourceConfigWithoutPassword(c) c.Assert(cfg.Parse([]string{"-config=./dm-worker.toml"}), IsNil) - workerCfg.Checker.CheckInterval = config.Duration{Duration: 40 * time.Millisecond} - workerCfg.Checker.BackoffMin = config.Duration{Duration: 20 * time.Millisecond} - workerCfg.Checker.BackoffMax = config.Duration{Duration: 1 * time.Second} - workerCfg.From.Password = "" // no password set + sourceConfig.Checker.CheckEnable = true + sourceConfig.Checker.CheckInterval = config.Duration{Duration: 40 * time.Millisecond} + sourceConfig.Checker.BackoffMin = config.Duration{Duration: 20 * time.Millisecond} + sourceConfig.Checker.BackoffMax = config.Duration{Duration: 1 * time.Second} cfg.WorkerAddr = fmt.Sprintf(":%d", port) dir := c.MkDir() - workerCfg.RelayDir = dir - workerCfg.MetaDir = dir - workerCfg.EnableRelay = true + sourceConfig.RelayDir = dir + sourceConfig.MetaDir = dir + sourceConfig.EnableRelay = true NewRelayHolder = NewDummyRelayHolder defer func() { @@ -131,7 +132,7 @@ func (t *testServer) TestTaskAutoResume(c *C) { if s.closed.Get() { return false } - c.Assert(s.startWorker(workerCfg), IsNil) + c.Assert(s.startWorker(&sourceConfig), IsNil) return true }), IsTrue) // start task @@ -171,3 +172,28 @@ func (t *testServer) TestTaskAutoResume(c *C) { return false }), IsTrue) } + +func (t *testServer) TestPurgeRelayDir(c *C) { + cfg := loadSourceConfigWithoutPassword(c) + cfg.EnableRelay = true + dir := c.MkDir() + cfg.RelayDir = dir + + dirs := filepath.Join(dir, `f889521f-e994-11e9-94e9-0242ac110002.000001`) + c.Assert(os.MkdirAll(dirs, 0777), IsNil) + file := filepath.Join(dir, `server-uuid.index`) + f, err := os.Create(file) + c.Assert(err, IsNil) + c.Assert(f.Close(), IsNil) + file = filepath.Join(dirs, `relay.meta`) + f, err = os.Create(file) + c.Assert(err, IsNil) + c.Assert(f.Close(), IsNil) + + w, err := NewWorker(&cfg, nil) + c.Assert(err, IsNil) + c.Assert(w.purgeRelayDir(), IsNil) + files, err := ioutil.ReadDir(cfg.RelayDir) + c.Assert(err, IsNil) + c.Assert(files, HasLen, 0) +} diff --git a/pkg/etcdutil/etcdutil.go b/pkg/etcdutil/etcdutil.go index c94b5da07f..841dcdf5b4 100644 --- a/pkg/etcdutil/etcdutil.go +++ b/pkg/etcdutil/etcdutil.go @@ -52,3 +52,15 @@ func AddMember(client *clientv3.Client, peerAddrs []string) (*clientv3.MemberAdd defer cancel() return client.MemberAdd(ctx, peerAddrs) } + +// DoOpsInOneTxn do multiple etcd operations in one txn. +func DoOpsInOneTxn(cli *clientv3.Client, ops ...clientv3.Op) (int64, error) { + ctx, cancel := context.WithTimeout(cli.Ctx(), DefaultRequestTimeout) + defer cancel() + + resp, err := cli.Txn(ctx).Then(ops...).Commit() + if err != nil { + return 0, err + } + return resp.Header.Revision, nil +} diff --git a/pkg/ha/bound.go b/pkg/ha/bound.go new file mode 100644 index 0000000000..c32b7b1a5e --- /dev/null +++ b/pkg/ha/bound.go @@ -0,0 +1,215 @@ +// Copyright 2020 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + +package ha + +import ( + "context" + "encoding/json" + "fmt" + + "go.etcd.io/etcd/clientv3" + "go.etcd.io/etcd/mvcc/mvccpb" + "go.uber.org/zap" + + "github.com/pingcap/dm/dm/common" + "github.com/pingcap/dm/pkg/etcdutil" + "github.com/pingcap/dm/pkg/log" +) + +// SourceBound represents the bound relationship between the DM-worker instance and the upstream MySQL source. +type SourceBound struct { + Source string `json:"source"` // the source ID of the upstream. + Worker string `json:"worker"` // the name of the bounded DM-worker for the source. + + // only used to report to the caller of the watcher, do not marsh it. + // if it's true, it means the bound has been deleted in etcd. + IsDeleted bool `json:"-"` + // only has value in watcher, will get 0 in GetSourceBound + // record the etcd revision right after putting this SourceBound + Revision int64 `json:"-"` +} + +// NewSourceBound creates a new SourceBound instance. +func NewSourceBound(source, worker string) SourceBound { + return SourceBound{ + Source: source, + Worker: worker, + } +} + +// String implements Stringer interface. +func (b SourceBound) String() string { + s, _ := b.toJSON() + return s +} + +// toJSON returns the string of JSON represent. +func (b SourceBound) toJSON() (string, error) { + data, err := json.Marshal(b) + if err != nil { + return "", err + } + return string(data), nil +} + +// sourceBoundFromJSON constructs SourceBound from its JSON represent. +func sourceBoundFromJSON(s string) (b SourceBound, err error) { + err = json.Unmarshal([]byte(s), &b) + return +} + +// PutSourceBound puts the bound relationship into etcd. +// k/v: worker-name -> bound relationship. +func PutSourceBound(cli *clientv3.Client, bound SourceBound) (int64, error) { + op, err := putSourceBoundOp(bound) + if err != nil { + return 0, err + } + + return etcdutil.DoOpsInOneTxn(cli, op) +} + +// DeleteSourceBound deletes the bound relationship in etcd for the specified worker. +func DeleteSourceBound(cli *clientv3.Client, worker string) (int64, error) { + op := deleteSourceBoundOp(worker) + return etcdutil.DoOpsInOneTxn(cli, op) +} + +// GetSourceBound gets the source bound relationship for the specified DM-worker. +// if the bound relationship for the worker name not exist, return with `err == nil` and `revision == 0`. +// if the worker name is "", it will return all bound relationships as a map{worker-name: bound}. +// if the worker name is given, it will return a map{worker-name: bound} whose length is 1. +func GetSourceBound(cli *clientv3.Client, worker string) (map[string]SourceBound, int64, error) { + ctx, cancel := context.WithTimeout(cli.Ctx(), etcdutil.DefaultRequestTimeout) + defer cancel() + + var ( + sbm = make(map[string]SourceBound) + resp *clientv3.GetResponse + err error + ) + if worker != "" { + resp, err = cli.Get(ctx, common.UpstreamBoundWorkerKeyAdapter.Encode(worker)) + } else { + resp, err = cli.Get(ctx, common.UpstreamBoundWorkerKeyAdapter.Path(), clientv3.WithPrefix()) + } + + if err != nil { + return sbm, 0, err + } + + if resp.Count == 0 { + return sbm, 0, nil + } else if worker != "" && resp.Count > 1 { + // TODO(csuzhangxc): add terror. + // this should not happen. + return sbm, 0, fmt.Errorf("too many bound relationship (%d) exist for the DM-worker %s", resp.Count, worker) + } + + for _, kvs := range resp.Kvs { + bound, err2 := sourceBoundFromJSON(string(kvs.Value)) + if err2 != nil { + return sbm, 0, err2 + } + sbm[bound.Worker] = bound + } + + return sbm, resp.Header.Revision, nil +} + +// WatchSourceBound watches PUT & DELETE operations for the bound relationship of the specified DM-worker. +// For the DELETE operations, it returns an empty bound relationship. +func WatchSourceBound(ctx context.Context, cli *clientv3.Client, + worker string, revision int64, outCh chan<- SourceBound, errCh chan<- error) { + ch := cli.Watch(ctx, common.UpstreamBoundWorkerKeyAdapter.Encode(worker), clientv3.WithRev(revision)) + + for { + select { + case <-ctx.Done(): + return + case resp := <-ch: + if resp.Canceled { + // TODO(csuzhangxc): do retry here. + if resp.Err() != nil { + select { + case errCh <- resp.Err(): + case <-ctx.Done(): + } + } + return + } + + for _, ev := range resp.Events { + var ( + bound SourceBound + err error + ) + switch ev.Type { + case mvccpb.PUT: + bound, err = sourceBoundFromJSON(string(ev.Kv.Value)) + case mvccpb.DELETE: + bound, err = sourceBoundFromKey(string(ev.Kv.Key)) + bound.IsDeleted = true + default: + // this should not happen. + log.L().Error("unsupported etcd event type", zap.Reflect("kv", ev.Kv), zap.Reflect("type", ev.Type)) + continue + } + bound.Revision = ev.Kv.ModRevision + + if err != nil { + select { + case errCh <- err: + case <-ctx.Done(): + return + } + } else { + select { + case outCh <- bound: + case <-ctx.Done(): + return + } + } + } + } + } +} + +// sourceBoundFromKey constructs an incomplete bound relationship from an etcd key. +func sourceBoundFromKey(key string) (SourceBound, error) { + var bound SourceBound + ks, err := common.UpstreamBoundWorkerKeyAdapter.Decode(key) + if err != nil { + return bound, err + } + bound.Worker = ks[0] + return bound, nil +} + +// deleteSourceBoundOp returns a DELETE ectd operation for the bound relationship of the specified DM-worker. +func deleteSourceBoundOp(worker string) clientv3.Op { + return clientv3.OpDelete(common.UpstreamBoundWorkerKeyAdapter.Encode(worker)) +} + +// putSourceBoundOp returns a PUT etcd operation for the bound relationship. +// k/v: worker-name -> bound relationship. +func putSourceBoundOp(bound SourceBound) (clientv3.Op, error) { + value, err := bound.toJSON() + if err != nil { + return clientv3.Op{}, err + } + key := common.UpstreamBoundWorkerKeyAdapter.Encode(bound.Worker) + + return clientv3.OpPut(key, value), nil +} diff --git a/pkg/ha/bound_test.go b/pkg/ha/bound_test.go new file mode 100644 index 0000000000..f9bfbfd67b --- /dev/null +++ b/pkg/ha/bound_test.go @@ -0,0 +1,121 @@ +// Copyright 2020 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + +package ha + +import ( + "context" + "time" + + . "github.com/pingcap/check" +) + +func (t *testForEtcd) TestSourceBoundJSON(c *C) { + b1 := NewSourceBound("mysql-replica-1", "dm-worker-1") + + j, err := b1.toJSON() + c.Assert(err, IsNil) + c.Assert(j, Equals, `{"source":"mysql-replica-1","worker":"dm-worker-1"}`) + c.Assert(j, Equals, b1.String()) + + b2, err := sourceBoundFromJSON(j) + c.Assert(err, IsNil) + c.Assert(b2, DeepEquals, b1) +} + +func (t *testForEtcd) TestSourceBoundEtcd(c *C) { + defer clearTestInfoOperation(c) + + var ( + watchTimeout = 500 * time.Millisecond + worker1 = "dm-worker-1" + worker2 = "dm-worker-2" + bound1 = NewSourceBound("mysql-replica-1", worker1) + bound2 = NewSourceBound("mysql-replica-2", worker2) + ) + c.Assert(bound1.IsDeleted, IsFalse) + + // no bound exists. + sbm1, rev1, err := GetSourceBound(etcdTestCli, "") + c.Assert(err, IsNil) + c.Assert(rev1, Equals, int64(0)) + c.Assert(sbm1, HasLen, 0) + + // put two bounds. + rev2, err := PutSourceBound(etcdTestCli, bound1) + c.Assert(err, IsNil) + c.Assert(rev2, Greater, rev1) + rev3, err := PutSourceBound(etcdTestCli, bound2) + c.Assert(err, IsNil) + c.Assert(rev3, Greater, rev2) + + // watch the PUT operation for the bound1. + boundCh := make(chan SourceBound, 10) + errCh := make(chan error, 10) + ctx, cancel := context.WithTimeout(context.Background(), watchTimeout) + WatchSourceBound(ctx, etcdTestCli, worker1, rev2, boundCh, errCh) + cancel() + close(boundCh) + close(errCh) + c.Assert(len(boundCh), Equals, 1) + bound1.Revision = rev2 + c.Assert(<-boundCh, DeepEquals, bound1) + c.Assert(len(errCh), Equals, 0) + + // get bound1 back. + bound1.Revision = 0 + sbm2, rev4, err := GetSourceBound(etcdTestCli, worker1) + c.Assert(err, IsNil) + c.Assert(rev4, Equals, rev3) + c.Assert(sbm2, HasLen, 1) + c.Assert(sbm2[worker1], DeepEquals, bound1) + + // get bound1 and bound2 back. + bound2.Revision = 0 + sbm2, rev4, err = GetSourceBound(etcdTestCli, "") + c.Assert(err, IsNil) + c.Assert(rev4, Equals, rev3) + c.Assert(sbm2, HasLen, 2) + c.Assert(sbm2[worker1], DeepEquals, bound1) + c.Assert(sbm2[worker2], DeepEquals, bound2) + + // delete bound1. + rev5, err := DeleteSourceBound(etcdTestCli, worker1) + c.Assert(err, IsNil) + c.Assert(rev5, Greater, rev4) + + // delete bound2. + rev6, err := DeleteSourceBound(etcdTestCli, worker2) + c.Assert(err, IsNil) + c.Assert(rev6, Greater, rev5) + + // watch the DELETE operation for bound1. + boundCh = make(chan SourceBound, 10) + errCh = make(chan error, 10) + ctx, cancel = context.WithTimeout(context.Background(), watchTimeout) + WatchSourceBound(ctx, etcdTestCli, worker1, rev5, boundCh, errCh) + cancel() + close(boundCh) + close(errCh) + c.Assert(len(boundCh), Equals, 1) + bo := <-boundCh + c.Assert(bo.IsDeleted, IsTrue) + c.Assert(bo.Revision, Equals, rev5) + c.Assert(len(errCh), Equals, 0) + + // get again, bound1 not exists now. + sbm3, rev6, err := GetSourceBound(etcdTestCli, worker1) + c.Assert(err, IsNil) + c.Assert(rev6, Equals, int64(0)) + c.Assert(sbm3, HasLen, 0) +} diff --git a/pkg/ha/doc.go b/pkg/ha/doc.go new file mode 100644 index 0000000000..a69e4b6ff7 --- /dev/null +++ b/pkg/ha/doc.go @@ -0,0 +1,149 @@ +// Copyright 2020 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + +package ha + +// Data need to be persisted for the HA scheduler. +// - configuration: +// - the upstream MySQL config (content of `SourceConfig`): +// - PUT when adding an upstream (`operate-mysql create`) by DM-master. +// - verify the validation before PUT it into etcd. +// - GET when scheduling the source to a DM-worker instance by DM-worker. +// - DELETE when removing an upstream (`operate-mysql stop`) by DM-master. +// - DELETE with `the expectant stage of the relay` in one txn. +// - DELETE with `the bound relationship between the DM-worker instance and the upstream MySQL source` in one txn. +// - TODO: UPDATE support with `the expectant stage of the relay`. +// - the data migration subtask config (content of `SubTaskConfig`): +// - PUT when starting a task (`start-task`) by DM-master. +// - verify the validation before PUT it into etcd. +// - PUT with `the expectant stage of the subtask` in one txn. +// - GET when starting a subtask by DM-worker. +// - DELETE when stopping a task (`stop-task`) by DM-master. +// - DELETE with `the expectant stage of the subtask` in one txn. +// - TODO: UPDATE support with `the expectant stage of the subtask`. +// +// - node information (name, address, etc.): +// - the DM-worker instance: +// - PUT when adding a DM-worker instance by DM-master. +// - GET only when restoring the in-memory information after the leader of DM-master changed by the new leader. +// - DELETE when removing a DM-worker instance by DM-master. +// - TODO: UPDATE support later. +// +// - the health status (or keep-alive) of component instances: +// - the DM-worker instance: +// - PUT (keep-alive) by DM-worker (when the node is healthy). +// - GET (through WATCH) by DM-master to know if another schedule needed. +// - DELETE (when the lease is timeout) by etcd (when the node is un-healthy). +// - no need to UPDATE it manually. +// +// - the running stage: +// - NOTE: persist the current stage of the relay and subtask if needed later. +// - the bound relationship between the DM-worker instance and the upstream MySQL source (including relevant relay and subtasks): +// - PUT when scheduling the source to a DM-worker instance by DM-master. +// - PUT with `the expectant stage of the relay` in one txn for the first time. +// - GET (through GET/WATCH) by DM-worker to know relevant relay/subtasks have to do. +// - DELETE when the bounded DM-worker become offline. +// - DELETE when removing an upstream by DM-master. +// - DELETE with `the upstream MySQL config` in one txn. +// - DELETE with `the expectant stage of the relay` in one txn. +// - UPDATE when scheduling the source to another DM-worker instance by DM-master. +// - the expectant stage of the relay: +// - PUT when scheduling the source to a DM-worker instance by DM-master. +// - PUT with `the bound relationship between the DM-worker instance and the upstream MySQL source` in one txn for the first time. +// - GET (through GET/WATCH) by DM-worker to know how to update the current stage. +// - UPDATE when handling the user request (pause-relay/resume-relay) by DM-master. +// - DELETE when removing an upstream by DM-master. +// - DELETE with `the upstream MySQL config` in one txn. +// - DELETE with `the bound relationship between the DM-worker instance and the upstream MySQL source` in one txn. +// - the expectant stage of the subtask: +// - PUT/DELETE/UPDATE when handling the user request (start-task/stop-task/pause-task/resume-task) by DM-master. +// - GET (through GET/WATCH) by DM-worker to know how to update the current stage. +// +// The summary of the above: +// - only the DM-master WRITE schedule operations +// - NOTE: the DM-worker WRITE (PUT) its information and health status. +// - the DM-worker READ schedule operations and obey them. +// In other words, behaviors of the cluster are clear, that are decisions made by the DM-master. +// As long as the DM-worker can connect to the cluster, it must obey these decisions. +// If the DM-worker can't connect to the cluster, it must shutdown all operations. +// +// In this model, we use etcd as the command queue for communication between the DM-master and DM-worker instead of gRPC. +// +// One example of the workflow: +// 0. the user starts the DM-master cluster, and GET all history persisted data described above. +// - restore the in-memory status. +// 1. the user starts a DM-worker instance. +// - PUT DM-worker instance information into etcd. +// 2. DM-master GET the information of the DM-worker instance, and mark it as `free` status. +// 3. the user adds an upstream config. +// - PUT the config of the upstream into etcd. +// 4. DM-master schedules the upstream relevant operations to the free DM-worker. +// - PUT the bound relationship. +// - PUT the expectant stage of the relay if not exists. +// 5. DM-worker GET the bound relationship, the config of the upstream and the expectant stage of the relay. +// 6. DM-worker obey the expectant stage of the relay. +// - start relay (if error occurred, wait for the user to resolve it and do not re-schedule it to other DM-worker instances). +// 7. the user starts a data migration task. +// 8. DM-master PUT the data migration task config and the expectant stage of subtasks into etcd. +// 9. DM-worker GET the config of the subtask, the expectant stage of the subtask. +// 10. DM-worker obey the expectant stage of the subtask +// - start the subtask (if error occurred, wait for the user to resolve it). +// 11. the task keeps running for a period. +// 12. the user pauses the task. +// 13. DM-master PUT the expectant stage of the subtask. +// 14. DM-worker obey the expectant stage of the subtask. +// 15. the user resumes the task (DM-master and DM-worker handle it similar to pause the task). +// 16. the user stops the task. +// 17. DM-master DELETE the data migration task config and the expectant stage of subtasks in etcd. +// - DELETE the information before subtasks shutdown. +// 18. DM-worker stops the subtask. +// - NOTE: DM-worker should always stop the subtask if the expectant stage of the subtask is missing. +// 19. the relay of the DM-worker continues to run. +// 20. the user remove the upstream config. +// 21. DM-master DELETE the upstream MySQL config, the bound relationship and the expectant stage of the relay. +// 22. DM-worker shutdown. +// 23. the user marks the DM-worker as offline. +// - DELETE DM-worker instance information in etcd. +// +// when the DM-worker (with relay and subtasks) is down: +// 0. the status of the old DM-worker is un-health (keep-alive failed). +// 1. DM-master choose another DM-worker instance for failover. +// - DM-master can only schedule the source to another new DM-worker only after the old DM-worker shutdown, +// this may be achieved with some timeout/lease. +// 2. DM-master UPDATE the bound relationship to the new DM-worker. +// 3. the new DM-worker GET upstream config, the expectant stage of the relay and the expectant stage of the subtasks. +// 4. the new DM-worker obey the expectant stage. +// +// when the leader of the DM-master cluster changed: +// 0. the old DM-master shutdown its operation. +// 1. the new DM-master GET all history information to restore the in-memory status. +// 2. the new DM-master continue to handle user requests and scheduler for upstream sources. +// +// the operation for expectant stage (both for the relay and subtasks): +// - New: +// - not a valid expectant stage. +// - always mark the expectant stage as Running for the first create. +// - Running (schedule the source to the DM-worker, resume-relay or start-task, resume-task): +// - create and start if the relay/subtask instance not exists. +// - resume when in Paused currently. +// - invalid for other current stages, do nothing. +// - Paused (pause-relay or pause-task): +// - do nothing if the relay/subtask instance not exists. +// - pause when in Running currently. +// - invalid for other current stages, do nothing. +// - Stopped (stop-relay or stop-task): +// - never exists for expectant stage in etcd but DELETE the relevant information. +// - do nothing if the relay/subtask instance not exists. +// - stop if the relay/subtask instance exists. +// - Finished: +// - never exists for expectant stage in etcd. diff --git a/pkg/ha/keepalive.go b/pkg/ha/keepalive.go new file mode 100644 index 0000000000..ad1445caa7 --- /dev/null +++ b/pkg/ha/keepalive.go @@ -0,0 +1,191 @@ +// Copyright 2020 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + +package ha + +import ( + "context" + "encoding/json" + "time" + + "go.etcd.io/etcd/clientv3" + "go.etcd.io/etcd/mvcc/mvccpb" + "go.uber.org/zap" + + "github.com/pingcap/dm/dm/common" + "github.com/pingcap/dm/pkg/etcdutil" + "github.com/pingcap/dm/pkg/log" +) + +// WorkerEvent represents the PUT/DELETE keepalive event of DM-worker. +type WorkerEvent struct { + WorkerName string `json:"worker-name"` // the worker name of the worker. + JoinTime time.Time `json:"join-time"` // the time when worker start to keepalive with etcd + + // only used to report to the caller of the watcher, do not marsh it. + // if it's true, it means the worker has been deleted in etcd. + IsDeleted bool `json:"-"` +} + +// String implements Stringer interface. +func (w WorkerEvent) String() string { + str, _ := w.toJSON() + return str +} + +// toJSON returns the string of JSON represent. +func (w WorkerEvent) toJSON() (string, error) { + data, err := json.Marshal(w) + if err != nil { + return "", err + } + return string(data), nil +} + +// workerEventFromJSON constructs WorkerEvent from its JSON represent. +func workerEventFromJSON(s string) (w WorkerEvent, err error) { + err = json.Unmarshal([]byte(s), &w) + return +} + +func workerEventFromKey(key string) (WorkerEvent, error) { + var w WorkerEvent + ks, err := common.WorkerKeepAliveKeyAdapter.Decode(key) + if err != nil { + return w, err + } + w.WorkerName = ks[0] + return w, nil +} + +// KeepAlive puts the join time of the workerName into etcd. +// this key will be kept in etcd until the worker is blocked or failed +// k/v: workerName -> join time. +// TODO: fetch the actual master endpoints, the master member maybe changed. +func KeepAlive(ctx context.Context, cli *clientv3.Client, workerName string, keepAliveTTL int64) error { + cliCtx, cancel := context.WithTimeout(ctx, etcdutil.DefaultRequestTimeout) + defer cancel() + lease, err := cli.Grant(cliCtx, keepAliveTTL) + if err != nil { + return err + } + k := common.WorkerKeepAliveKeyAdapter.Encode(workerName) + workerEventJSON, err := WorkerEvent{ + WorkerName: workerName, + JoinTime: time.Now(), + }.toJSON() + if err != nil { + return err + } + _, err = cli.Put(cliCtx, k, workerEventJSON, clientv3.WithLease(lease.ID)) + if err != nil { + return err + } + ch, err := cli.KeepAlive(ctx, lease.ID) + if err != nil { + return err + } + for { + select { + case _, ok := <-ch: + if !ok { + log.L().Info("keep alive channel is closed") + return nil + } + case <-ctx.Done(): + log.L().Info("ctx is canceled, keepalive will exit now") + ctx, cancel := context.WithTimeout(cli.Ctx(), etcdutil.DefaultRequestTimeout) + cli.Revoke(ctx, lease.ID) + cancel() + return nil + } + } +} + +// WatchWorkerEvent watches the online and offline of workers from etcd. +// this function will output the worker event to evCh, output the error to errCh +func WatchWorkerEvent(ctx context.Context, cli *clientv3.Client, rev int64, outCh chan<- WorkerEvent, errCh chan<- error) { + watcher := clientv3.NewWatcher(cli) + ch := watcher.Watch(ctx, common.WorkerKeepAliveKeyAdapter.Path(), clientv3.WithPrefix(), clientv3.WithRev(rev)) + + for { + select { + case <-ctx.Done(): + log.L().Info("watch keepalive worker quit due to context canceled") + return + case resp := <-ch: + if resp.Canceled { + select { + case errCh <- resp.Err(): + case <-ctx.Done(): + } + return + } + + for _, ev := range resp.Events { + log.L().Info("receive dm-worker keep alive event", zap.String("operation", ev.Type.String()), zap.String("kv", string(ev.Kv.Key))) + var ( + event WorkerEvent + err error + ) + switch ev.Type { + case mvccpb.PUT: + event, err = workerEventFromJSON(string(ev.Kv.Value)) + case mvccpb.DELETE: + event, err = workerEventFromKey(string(ev.Kv.Key)) + event.IsDeleted = true + default: + // this should not happen. + log.L().Error("unsupported etcd event type", zap.Reflect("kv", ev.Kv), zap.Reflect("type", ev.Type)) + continue + } + if err != nil { + select { + case errCh <- err: + case <-ctx.Done(): + return + } + } else { + select { + case outCh <- event: + case <-ctx.Done(): + return + } + } + } + } + } +} + +// GetKeepAliveWorkers gets current alive workers, +// and returns a map{workerName: WorkerEvent}, revision and error +func GetKeepAliveWorkers(cli *clientv3.Client) (map[string]WorkerEvent, int64, error) { + ctx, cancel := context.WithTimeout(cli.Ctx(), etcdutil.DefaultRequestTimeout) + defer cancel() + + var wwm map[string]WorkerEvent + resp, err := cli.Get(ctx, common.WorkerKeepAliveKeyAdapter.Path(), clientv3.WithPrefix()) + if err != nil { + return wwm, 0, err + } + + wwm = make(map[string]WorkerEvent, len(resp.Kvs)) + for _, kv := range resp.Kvs { + w, err := workerEventFromJSON(string(kv.Value)) + if err != nil { + return wwm, 0, err + } + wwm[w.WorkerName] = w + } + return wwm, resp.Header.Revision, nil +} diff --git a/pkg/ha/keepalive_test.go b/pkg/ha/keepalive_test.go new file mode 100644 index 0000000000..a4f2f4ccb9 --- /dev/null +++ b/pkg/ha/keepalive_test.go @@ -0,0 +1,97 @@ +// Copyright 2020 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + +package ha + +import ( + "context" + "strconv" + "sync/atomic" + "time" + + "github.com/pingcap/dm/pkg/utils" + + . "github.com/pingcap/check" +) + +// keepAliveTTL is set to 0 because the actual ttl is set to minLeaseTTL of etcd +// minLeaseTTL is 1 in etcd cluster +var keepAliveTTL = int64(0) + +func (t *testForEtcd) TestWorkerKeepAlive(c *C) { + defer clearTestInfoOperation(c) + wwm, rev, err := GetKeepAliveWorkers(etcdTestCli) + c.Assert(err, IsNil) + c.Assert(wwm, HasLen, 0) + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + timeout := 2 * time.Second + evCh := make(chan WorkerEvent, 10) + errCh := make(chan error, 10) + closed := make(chan struct{}) + finished := int32(0) + + go func() { + WatchWorkerEvent(ctx, etcdTestCli, rev, evCh, errCh) + close(closed) + }() + + cancels := make([]context.CancelFunc, 0, 5) + for i := 1; i <= 5; i++ { + worker := "worker" + strconv.Itoa(i) + curTime := time.Now() + ctx1, cancel1 := context.WithCancel(ctx) + cancels = append(cancels, cancel1) + go func(ctx context.Context) { + err1 := KeepAlive(ctx, etcdTestCli, worker, keepAliveTTL) + c.Assert(err1, IsNil) + atomic.AddInt32(&finished, 1) + }(ctx1) + + select { + case ev := <-evCh: + c.Assert(ev.IsDeleted, IsFalse) + c.Assert(ev.WorkerName, Equals, worker) + c.Assert(ev.JoinTime.Before(curTime), IsFalse) + case <-time.After(timeout): + c.Fatal("fail to receive put ev " + strconv.Itoa(i) + " before timeout") + } + } + + for i, cancel1 := range cancels { + worker := "worker" + strconv.Itoa(i+1) + cancel1() + select { + case ev := <-evCh: + c.Assert(ev.IsDeleted, IsTrue) + c.Assert(ev.WorkerName, Equals, worker) + case <-time.After(timeout): + c.Fatal("fail to receive delete ev " + strconv.Itoa(i+1) + " before timeout") + } + } + + waitKeepAliveQuit := utils.WaitSomething(100, timeout, func() bool { + return atomic.LoadInt32(&finished) == 5 + }) + c.Assert(waitKeepAliveQuit, IsTrue) + + cancel() + select { + case <-closed: + case <-time.After(timeout): + c.Fatal("fail to quit WatchWorkerEvent before timeout") + } + c.Assert(errCh, HasLen, 0) +} diff --git a/pkg/ha/ops.go b/pkg/ha/ops.go new file mode 100644 index 0000000000..81d5d6764f --- /dev/null +++ b/pkg/ha/ops.go @@ -0,0 +1,96 @@ +// Copyright 2020 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + +package ha + +import ( + "go.etcd.io/etcd/clientv3" + "go.etcd.io/etcd/mvcc/mvccpb" + + "github.com/pingcap/dm/dm/config" + "github.com/pingcap/dm/pkg/etcdutil" +) + +// PutRelayStageSourceBound puts the following data in one txn. +// - relay stage. +// - source bound relationship. +func PutRelayStageSourceBound(cli *clientv3.Client, stage Stage, bound SourceBound) (int64, error) { + ops1, err := putRelayStageOp(stage) + if err != nil { + return 0, err + } + op2, err := putSourceBoundOp(bound) + if err != nil { + return 0, err + } + ops := make([]clientv3.Op, 0, 2) + ops = append(ops, ops1...) + ops = append(ops, op2) + return etcdutil.DoOpsInOneTxn(cli, ops...) +} + +// DeleteSourceCfgRelayStageSourceBound deletes the following data in one txn. +// - upstream source config. +// - relay stage. +// - source bound relationship. +func DeleteSourceCfgRelayStageSourceBound(cli *clientv3.Client, source, worker string) (int64, error) { + sourceCfgOp := deleteSourceCfgOp(source) + relayStageOp := deleteRelayStageOp(source) + sourceBoundOp := deleteSourceBoundOp(worker) + return etcdutil.DoOpsInOneTxn(cli, sourceCfgOp, relayStageOp, sourceBoundOp) +} + +// PutSubTaskCfgStage puts the following data in one txn. +// - subtask config. +// - subtask stage. +// NOTE: golang can't use two `...` in the func, so use `[]` instead. +func PutSubTaskCfgStage(cli *clientv3.Client, cfgs []config.SubTaskConfig, stages []Stage) (int64, error) { + return opSubTaskCfgStage(cli, mvccpb.PUT, cfgs, stages) +} + +// DeleteSubTaskCfgStage deletes the following data in one txn. +// - subtask config. +// - subtask stage. +// NOTE: golang can't use two `...` in the func, so use `[]` instead. +func DeleteSubTaskCfgStage(cli *clientv3.Client, cfgs []config.SubTaskConfig, stages []Stage) (int64, error) { + return opSubTaskCfgStage(cli, mvccpb.DELETE, cfgs, stages) +} + +// opSubTaskCfgStage puts/deletes for subtask config and stage in one txn. +func opSubTaskCfgStage(cli *clientv3.Client, evType mvccpb.Event_EventType, + cfgs []config.SubTaskConfig, stages []Stage) (int64, error) { + var ( + ops1 []clientv3.Op + ops2 []clientv3.Op + err error + ) + switch evType { + case mvccpb.PUT: + ops1, err = putSubTaskCfgOp(cfgs...) + if err != nil { + return 0, err + } + ops2, err = putSubTaskStageOp(stages...) + if err != nil { + return 0, err + } + case mvccpb.DELETE: + ops1 = deleteSubTaskCfgOp(cfgs...) + ops2 = deleteSubTaskStageOp(stages...) + } + + ops := make([]clientv3.Op, 0, len(ops1)+len(ops2)) + ops = append(ops, ops1...) + ops = append(ops, ops2...) + return etcdutil.DoOpsInOneTxn(cli, ops...) +} diff --git a/pkg/ha/ops_test.go b/pkg/ha/ops_test.go new file mode 100644 index 0000000000..4a7532222b --- /dev/null +++ b/pkg/ha/ops_test.go @@ -0,0 +1,126 @@ +// Copyright 2020 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + +package ha + +import ( + . "github.com/pingcap/check" + + "github.com/pingcap/dm/dm/config" + "github.com/pingcap/dm/dm/pb" +) + +func (t *testForEtcd) TestOpsEtcd(c *C) { + defer clearTestInfoOperation(c) + + var ( + source = "mysql-replica-1" + worker = "dm-worker-1" + task1 = "task-1" + task2 = "task-2" + relayStage = NewRelayStage(pb.Stage_Running, source) + subtaskStage1 = NewSubTaskStage(pb.Stage_Running, source, task1) + subtaskStage2 = NewSubTaskStage(pb.Stage_Running, source, task2) + emptyStage = Stage{} + bound = NewSourceBound(source, worker) + sourceCfg = config.SourceConfig{} + emptySourceCfg = config.SourceConfig{} + subtaskCfg1 = config.SubTaskConfig{} + ) + c.Assert(sourceCfg.LoadFromFile(sourceSampleFile), IsNil) + sourceCfg.SourceID = source + c.Assert(subtaskCfg1.DecodeFile(subTaskSampleFile), IsNil) + subtaskCfg1.SourceID = source + subtaskCfg1.Name = task1 + c.Assert(subtaskCfg1.Adjust(), IsNil) + subtaskCfg2 := subtaskCfg1 + subtaskCfg2.Name = task2 + c.Assert(subtaskCfg2.Adjust(), IsNil) + + // put relay stage and source bound. + rev1, err := PutRelayStageSourceBound(etcdTestCli, relayStage, bound) + c.Assert(err, IsNil) + c.Assert(rev1, Greater, int64(0)) + // put source config. + rev2, err := PutSourceCfg(etcdTestCli, sourceCfg) + c.Assert(err, IsNil) + c.Assert(rev2, Greater, rev1) + + // get them back. + st1, rev3, err := GetRelayStage(etcdTestCli, source) + c.Assert(err, IsNil) + c.Assert(rev3, Equals, rev2) + c.Assert(st1, DeepEquals, relayStage) + sbm1, rev3, err := GetSourceBound(etcdTestCli, worker) + c.Assert(err, IsNil) + c.Assert(rev3, Equals, rev2) + c.Assert(sbm1, HasLen, 1) + c.Assert(sbm1[worker], DeepEquals, bound) + soCfg1, rev3, err := GetSourceCfg(etcdTestCli, source, 0) + c.Assert(err, IsNil) + c.Assert(rev3, Equals, rev2) + c.Assert(soCfg1, DeepEquals, sourceCfg) + + // delete source config, relay stage and source bound. + rev4, err := DeleteSourceCfgRelayStageSourceBound(etcdTestCli, source, worker) + c.Assert(err, IsNil) + c.Assert(rev4, Greater, rev3) + + // try to get them back again. + st2, rev5, err := GetRelayStage(etcdTestCli, source) + c.Assert(err, IsNil) + c.Assert(rev5, Equals, int64(0)) + c.Assert(st2, Equals, emptyStage) + sbm2, rev5, err := GetSourceBound(etcdTestCli, worker) + c.Assert(err, IsNil) + c.Assert(rev5, Equals, int64(0)) + c.Assert(sbm2, HasLen, 0) + soCfg2, rev5, err := GetSourceCfg(etcdTestCli, source, 0) + c.Assert(err, IsNil) + c.Assert(rev5, Equals, int64(0)) + c.Assert(soCfg2, DeepEquals, emptySourceCfg) + + // put subtask config and subtask stage. + rev6, err := PutSubTaskCfgStage(etcdTestCli, []config.SubTaskConfig{subtaskCfg1, subtaskCfg2}, []Stage{subtaskStage1, subtaskStage2}) + c.Assert(err, IsNil) + c.Assert(rev6, Greater, rev5) + + // get them back. + stcm, rev7, err := GetSubTaskCfg(etcdTestCli, source, "", 0) + c.Assert(err, IsNil) + c.Assert(rev7, Equals, rev6) + c.Assert(stcm, HasLen, 2) + c.Assert(stcm[task1], DeepEquals, subtaskCfg1) + c.Assert(stcm[task2], DeepEquals, subtaskCfg2) + stsm, rev7, err := GetSubTaskStage(etcdTestCli, source, "") + c.Assert(err, IsNil) + c.Assert(rev7, Equals, rev6) + c.Assert(stsm, HasLen, 2) + c.Assert(stsm[task1], DeepEquals, subtaskStage1) + c.Assert(stsm[task2], DeepEquals, subtaskStage2) + + // delete them. + rev8, err := DeleteSubTaskCfgStage(etcdTestCli, []config.SubTaskConfig{subtaskCfg1, subtaskCfg2}, []Stage{subtaskStage1, subtaskStage2}) + c.Assert(err, IsNil) + c.Assert(rev8, Greater, rev7) + + // try to get them back again. + stcm, rev9, err := GetSubTaskCfg(etcdTestCli, source, "", 0) + c.Assert(err, IsNil) + c.Assert(rev9, Equals, int64(0)) + c.Assert(stcm, HasLen, 0) + stsm, rev9, err = GetSubTaskStage(etcdTestCli, source, "") + c.Assert(err, IsNil) + c.Assert(rev9, Equals, int64(0)) + c.Assert(stsm, HasLen, 0) +} diff --git a/pkg/ha/source.go b/pkg/ha/source.go new file mode 100644 index 0000000000..727ab88a75 --- /dev/null +++ b/pkg/ha/source.go @@ -0,0 +1,111 @@ +// Copyright 2020 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + +package ha + +import ( + "context" + "fmt" + + "go.etcd.io/etcd/clientv3" + + "github.com/pingcap/dm/dm/common" + "github.com/pingcap/dm/dm/config" + "github.com/pingcap/dm/pkg/etcdutil" +) + +// PutSourceCfg puts the config of the upstream source into etcd. +// k/v: sourceID -> source config. +func PutSourceCfg(cli *clientv3.Client, cfg config.SourceConfig) (int64, error) { + value, err := cfg.Toml() + if err != nil { + return 0, err + } + key := common.UpstreamConfigKeyAdapter.Encode(cfg.SourceID) + + return etcdutil.DoOpsInOneTxn(cli, clientv3.OpPut(key, value)) +} + +// GetSourceCfg gets the config of the specified source. +// if the config for the source not exist, return with `err == nil` and `revision=0`. +func GetSourceCfg(cli *clientv3.Client, source string, rev int64) (config.SourceConfig, int64, error) { + ctx, cancel := context.WithTimeout(cli.Ctx(), etcdutil.DefaultRequestTimeout) + defer cancel() + + cfg := config.SourceConfig{} + resp, err := cli.Get(ctx, common.UpstreamConfigKeyAdapter.Encode(source), clientv3.WithRev(rev)) + if err != nil { + return cfg, 0, err + } + + if resp.Count == 0 { + return cfg, 0, nil + } else if resp.Count > 1 { + // TODO(csuzhangxc): add terror. + // this should not happen. + return cfg, 0, fmt.Errorf("too many config (%d) exist for the source %s", resp.Count, source) + } + + err = cfg.Parse(string(resp.Kvs[0].Value)) + if err != nil { + return cfg, 0, err + } + + return cfg, resp.Header.Revision, nil +} + +// GetAllSourceCfg gets all upstream source configs. +// k/v: source ID -> source config. +func GetAllSourceCfg(cli *clientv3.Client) (map[string]config.SourceConfig, int64, error) { + ctx, cancel := context.WithTimeout(cli.Ctx(), etcdutil.DefaultRequestTimeout) + defer cancel() + + resp, err := cli.Get(ctx, common.UpstreamConfigKeyAdapter.Path(), clientv3.WithPrefix()) + if err != nil { + return nil, 0, err + } + + scm := make(map[string]config.SourceConfig) + for _, kv := range resp.Kvs { + var cfg config.SourceConfig + err = cfg.Parse(string(kv.Value)) + if err != nil { + // TODO(csuzhangxc): add terror and including `key`. + return nil, 0, err + } + scm[cfg.SourceID] = cfg + } + + return scm, resp.Header.Revision, nil +} + +// deleteSourceCfgOp returns a DELETE etcd operation for the source config. +func deleteSourceCfgOp(source string) clientv3.Op { + return clientv3.OpDelete(common.UpstreamConfigKeyAdapter.Encode(source)) +} + +// ClearTestInfoOperation is used to clear all DM-HA relative etcd keys' information +// this function shouldn't be used in development environment +func ClearTestInfoOperation(cli *clientv3.Client) error { + clearSource := clientv3.OpDelete(common.UpstreamConfigKeyAdapter.Path(), clientv3.WithPrefix()) + clearSubTask := clientv3.OpDelete(common.UpstreamSubTaskKeyAdapter.Path(), clientv3.WithPrefix()) + clearWorkerInfo := clientv3.OpDelete(common.WorkerRegisterKeyAdapter.Path(), clientv3.WithPrefix()) + clearWorkerKeepAlive := clientv3.OpDelete(common.WorkerKeepAliveKeyAdapter.Path(), clientv3.WithPrefix()) + clearBound := clientv3.OpDelete(common.UpstreamBoundWorkerKeyAdapter.Path(), clientv3.WithPrefix()) + clearRelayStage := clientv3.OpDelete(common.StageRelayKeyAdapter.Path(), clientv3.WithPrefix()) + clearSubTaskStage := clientv3.OpDelete(common.StageSubTaskKeyAdapter.Path(), clientv3.WithPrefix()) + _, err := cli.Txn(context.Background()).Then( + clearSource, clearSubTask, clearWorkerInfo, clearBound, clearWorkerKeepAlive, clearRelayStage, clearSubTaskStage, + ).Commit() + return err +} diff --git a/pkg/ha/source_test.go b/pkg/ha/source_test.go new file mode 100644 index 0000000000..da46f93544 --- /dev/null +++ b/pkg/ha/source_test.go @@ -0,0 +1,107 @@ +// Copyright 2020 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + +package ha + +import ( + "context" + "testing" + + . "github.com/pingcap/check" + "go.etcd.io/etcd/clientv3" + "go.etcd.io/etcd/integration" + + "github.com/pingcap/dm/dm/config" +) + +const ( + // do not forget to update this path if the file removed/renamed. + sourceSampleFile = "../../dm/worker/source.toml" +) + +var ( + etcdTestCli *clientv3.Client +) + +func TestHA(t *testing.T) { + mockCluster := integration.NewClusterV3(t, &integration.ClusterConfig{Size: 1}) + defer mockCluster.Terminate(t) + + etcdTestCli = mockCluster.RandClient() + + TestingT(t) +} + +// clear keys in etcd test cluster. +func clearTestInfoOperation(c *C) { + c.Assert(ClearTestInfoOperation(etcdTestCli), IsNil) +} + +type testForEtcd struct{} + +var _ = Suite(&testForEtcd{}) + +func (t *testForEtcd) TestSourceEtcd(c *C) { + defer clearTestInfoOperation(c) + + var ( + emptyCfg = config.SourceConfig{} + cfg = config.SourceConfig{} + ) + c.Assert(cfg.LoadFromFile(sourceSampleFile), IsNil) + source := cfg.SourceID + cfgExtra := cfg + cfgExtra.SourceID = "mysql-replica-2" + + // no source config exist. + cfg1, rev1, err := GetSourceCfg(etcdTestCli, source, 0) + c.Assert(err, IsNil) + c.Assert(rev1, Equals, int64(0)) + c.Assert(cfg1, DeepEquals, emptyCfg) + cfgM, _, err := GetAllSourceCfg(etcdTestCli) + c.Assert(err, IsNil) + c.Assert(cfgM, HasLen, 0) + + // put a source config. + rev2, err := PutSourceCfg(etcdTestCli, cfg) + c.Assert(err, IsNil) + c.Assert(rev2, Greater, rev1) + + // get the config back. + cfg2, rev3, err := GetSourceCfg(etcdTestCli, source, 0) + c.Assert(err, IsNil) + c.Assert(rev3, Equals, rev2) + c.Assert(cfg2, DeepEquals, cfg) + + // put another source config. + rev2, err = PutSourceCfg(etcdTestCli, cfgExtra) + c.Assert(err, IsNil) + + // get all two config. + cfgM, rev3, err = GetAllSourceCfg(etcdTestCli) + c.Assert(rev3, Equals, rev2) + c.Assert(cfgM, HasLen, 2) + c.Assert(cfgM[source], DeepEquals, cfg) + c.Assert(cfgM[cfgExtra.SourceID], DeepEquals, cfgExtra) + + // delete the config. + deleteOp := deleteSourceCfgOp(source) + _, err = etcdTestCli.Txn(context.Background()).Then(deleteOp).Commit() + c.Assert(err, IsNil) + + // get again, not exists now. + cfg3, rev4, err := GetSourceCfg(etcdTestCli, source, 0) + c.Assert(err, IsNil) + c.Assert(rev4, Equals, int64(0)) + c.Assert(cfg3, DeepEquals, emptyCfg) +} diff --git a/pkg/ha/stage.go b/pkg/ha/stage.go new file mode 100644 index 0000000000..76d00339dd --- /dev/null +++ b/pkg/ha/stage.go @@ -0,0 +1,363 @@ +// Copyright 2020 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + +package ha + +import ( + "context" + "encoding/json" + "fmt" + + "go.etcd.io/etcd/clientv3" + "go.etcd.io/etcd/mvcc/mvccpb" + "go.uber.org/zap" + + "github.com/pingcap/dm/dm/common" + "github.com/pingcap/dm/dm/pb" + "github.com/pingcap/dm/pkg/etcdutil" + "github.com/pingcap/dm/pkg/log" +) + +// Stage represents the running stage for a relay or subtask. +type Stage struct { + Expect pb.Stage `json:"expect"` // the expectant stage. + Source string `json:"source"` // the source ID of the upstream. + Task string `json:"task,omitempty"` // the task name for subtask; empty for relay. + + // only used to report to the caller of the watcher, do not marsh it. + // if it's true, it means the stage has been deleted in etcd. + IsDeleted bool `json:"-"` + // only has value in watcher, will get 0 in GetStage + // record the etcd revision right after putting this Stage + Revision int64 `json:"-"` +} + +// NewRelayStage creates a new Stage instance for relay. +func NewRelayStage(expect pb.Stage, source string) Stage { + return newStage(expect, source, "") +} + +// NewSubTaskStage creates a new Stage instance for subtask. +func NewSubTaskStage(expect pb.Stage, source, task string) Stage { + return newStage(expect, source, task) +} + +// newStage creates a new Stage instance. +func newStage(expect pb.Stage, source, task string) Stage { + return Stage{ + Expect: expect, + Source: source, + Task: task, + } +} + +// String implements Stringer interface. +func (s Stage) String() string { + str, _ := s.toJSON() + return str +} + +// toJSON returns the string of JSON represent. +func (s Stage) toJSON() (string, error) { + data, err := json.Marshal(s) + if err != nil { + return "", err + } + return string(data), nil +} + +// stageFromJSON constructs Stage from its JSON represent. +func stageFromJSON(str string) (s Stage, err error) { + err = json.Unmarshal([]byte(str), &s) + return +} + +// PutRelayStage puts the stage of the relay into etcd. +// k/v: sourceID -> the running stage of the relay. +func PutRelayStage(cli *clientv3.Client, stages ...Stage) (int64, error) { + ops, err := putRelayStageOp(stages...) + if err != nil { + return 0, err + } + return etcdutil.DoOpsInOneTxn(cli, ops...) +} + +// PutSubTaskStage puts the stage of the subtask into etcd. +// k/v: sourceID, task -> the running stage of the subtask. +func PutSubTaskStage(cli *clientv3.Client, stages ...Stage) (int64, error) { + ops, err := putSubTaskStageOp(stages...) + if err != nil { + return 0, err + } + return etcdutil.DoOpsInOneTxn(cli, ops...) +} + +// GetRelayStage gets the relay stage for the specified upstream source. +// if the stage for the source not exist, return with `err == nil` and `revision=0`. +func GetRelayStage(cli *clientv3.Client, source string) (Stage, int64, error) { + ctx, cancel := context.WithTimeout(cli.Ctx(), etcdutil.DefaultRequestTimeout) + defer cancel() + + var stage Stage + resp, err := cli.Get(ctx, common.StageRelayKeyAdapter.Encode(source)) + if err != nil { + return stage, 0, err + } + + if resp.Count == 0 { + return stage, 0, nil + } else if resp.Count > 1 { + // TODO(csuzhangxc): add terror. + // this should not happen. + return stage, 0, fmt.Errorf("too many relay stage (%d) exist for source %s", resp.Count, source) + } + + stage, err = stageFromJSON(string(resp.Kvs[0].Value)) + if err != nil { + return stage, 0, err + } + + return stage, resp.Header.Revision, nil +} + +// GetAllRelayStage gets all relay stages. +// k/v: source ID -> relay stage. +func GetAllRelayStage(cli *clientv3.Client) (map[string]Stage, int64, error) { + ctx, cancel := context.WithTimeout(cli.Ctx(), etcdutil.DefaultRequestTimeout) + defer cancel() + + resp, err := cli.Get(ctx, common.StageRelayKeyAdapter.Path(), clientv3.WithPrefix()) + if err != nil { + return nil, 0, err + } + + stages := make(map[string]Stage) + for _, kv := range resp.Kvs { + stage, err2 := stageFromJSON(string(kv.Value)) + if err2 != nil { + return nil, 0, err2 + } + stages[stage.Source] = stage + } + return stages, resp.Header.Revision, nil +} + +// GetSubTaskStage gets the subtask stage for the specified upstream source and task name. +// if the stage for the source and task name not exist, return with `err == nil` and `revision=0`. +// if task name is "", it will return all subtasks' stage as a map{task-name: stage} for the source. +// if task name is given, it will return a map{task-name: stage} whose length is 1. +func GetSubTaskStage(cli *clientv3.Client, source, task string) (map[string]Stage, int64, error) { + ctx, cancel := context.WithTimeout(cli.Ctx(), etcdutil.DefaultRequestTimeout) + defer cancel() + + var ( + stm = make(map[string]Stage) + resp *clientv3.GetResponse + err error + ) + if task != "" { + resp, err = cli.Get(ctx, common.StageSubTaskKeyAdapter.Encode(source, task)) + } else { + resp, err = cli.Get(ctx, common.StageSubTaskKeyAdapter.Encode(source), clientv3.WithPrefix()) + } + + if err != nil { + return stm, 0, err + } + + if resp.Count == 0 { + return stm, 0, nil + } else if task != "" && resp.Count > 1 { + return stm, 0, fmt.Errorf("too many stage (%d) exist for subtask {sourceID: %s, task name: %s}", resp.Count, source, task) + } + + for _, kvs := range resp.Kvs { + stage, err2 := stageFromJSON(string(kvs.Value)) + if err2 != nil { + return stm, 0, err2 + } + stm[stage.Task] = stage + } + + return stm, resp.Header.Revision, nil +} + +// GetAllSubTaskStage gets all subtask stages. +// k/v: source ID -> task name -> subtask stage. +func GetAllSubTaskStage(cli *clientv3.Client) (map[string]map[string]Stage, int64, error) { + ctx, cancel := context.WithTimeout(cli.Ctx(), etcdutil.DefaultRequestTimeout) + defer cancel() + + resp, err := cli.Get(ctx, common.StageSubTaskKeyAdapter.Path(), clientv3.WithPrefix()) + if err != nil { + return nil, 0, err + } + + stages := make(map[string]map[string]Stage) + for _, kvs := range resp.Kvs { + stage, err2 := stageFromJSON(string(kvs.Value)) + if err2 != nil { + return nil, 0, err2 + } + if _, ok := stages[stage.Source]; !ok { + stages[stage.Source] = make(map[string]Stage) + } + stages[stage.Source][stage.Task] = stage + } + + return stages, resp.Header.Revision, nil +} + +// WatchRelayStage watches PUT & DELETE operations for the relay stage. +// for the DELETE stage, it returns an empty stage. +func WatchRelayStage(ctx context.Context, cli *clientv3.Client, + source string, revision int64, outCh chan<- Stage, errCh chan<- error) { + ch := cli.Watch(ctx, common.StageRelayKeyAdapter.Encode(source), clientv3.WithRev(revision)) + watchStage(ctx, ch, relayStageFromKey, outCh, errCh) +} + +// WatchSubTaskStage watches PUT & DELETE operations for the subtask stage. +// for the DELETE stage, it returns an empty stage. +func WatchSubTaskStage(ctx context.Context, cli *clientv3.Client, + source string, revision int64, outCh chan<- Stage, errCh chan<- error) { + ch := cli.Watch(ctx, common.StageSubTaskKeyAdapter.Encode(source), clientv3.WithPrefix(), clientv3.WithRev(revision)) + watchStage(ctx, ch, subTaskStageFromKey, outCh, errCh) +} + +// DeleteSubTaskStage deletes the subtask stage. +func DeleteSubTaskStage(cli *clientv3.Client, stages ...Stage) (int64, error) { + ops := deleteSubTaskStageOp(stages...) + return etcdutil.DoOpsInOneTxn(cli, ops...) +} + +// relayStageFromKey constructs an incomplete relay stage from an etcd key. +func relayStageFromKey(key string) (Stage, error) { + var stage Stage + ks, err := common.StageRelayKeyAdapter.Decode(key) + if err != nil { + return stage, err + } + stage.Source = ks[0] + return stage, nil +} + +// subTaskStageFromKey constructs an incomplete subtask stage from an etcd key. +func subTaskStageFromKey(key string) (Stage, error) { + var stage Stage + ks, err := common.StageSubTaskKeyAdapter.Decode(key) + if err != nil { + return stage, err + } + stage.Source = ks[0] + stage.Task = ks[1] + return stage, nil +} + +// watchStage watches PUT & DELETE operations for the stage. +func watchStage(ctx context.Context, watchCh clientv3.WatchChan, + stageFromKey func(key string) (Stage, error), outCh chan<- Stage, errCh chan<- error) { + for { + select { + case <-ctx.Done(): + return + case resp := <-watchCh: + if resp.Canceled { + // TODO(csuzhangxc): do retry here. + if resp.Err() != nil { + select { + case errCh <- resp.Err(): + case <-ctx.Done(): + } + } + return + } + + for _, ev := range resp.Events { + var ( + stage Stage + err error + ) + switch ev.Type { + case mvccpb.PUT: + stage, err = stageFromJSON(string(ev.Kv.Value)) + case mvccpb.DELETE: + stage, err = stageFromKey(string(ev.Kv.Key)) + stage.IsDeleted = true + default: + // this should not happen. + log.L().Error("unsupported etcd event type", zap.Reflect("kv", ev.Kv), zap.Reflect("type", ev.Type)) + continue + } + stage.Revision = ev.Kv.ModRevision + + if err != nil { + select { + case errCh <- err: + case <-ctx.Done(): + return + } + } else { + select { + case outCh <- stage: + case <-ctx.Done(): + return + } + } + } + } + } +} + +// putRelayStageOp returns a list of PUT etcd operation for the relay stage. +// k/v: sourceID -> the running stage of the relay. +func putRelayStageOp(stages ...Stage) ([]clientv3.Op, error) { + ops := make([]clientv3.Op, 0, len(stages)) + for _, stage := range stages { + value, err := stage.toJSON() + if err != nil { + return ops, err + } + key := common.StageRelayKeyAdapter.Encode(stage.Source) + ops = append(ops, clientv3.OpPut(key, value)) + } + return ops, nil +} + +// putSubTaskStageOp returns a list of PUT etcd operations for the subtask stage. +// k/v: sourceID, task -> the running stage of the subtask. +func putSubTaskStageOp(stages ...Stage) ([]clientv3.Op, error) { + ops := make([]clientv3.Op, 0, len(stages)) + for _, stage := range stages { + value, err := stage.toJSON() + if err != nil { + return ops, err + } + key := common.StageSubTaskKeyAdapter.Encode(stage.Source, stage.Task) + ops = append(ops, clientv3.OpPut(key, value)) + } + return ops, nil +} + +// deleteRelayStageOp returns a DELETE etcd operation for the relay stage. +func deleteRelayStageOp(source string) clientv3.Op { + return clientv3.OpDelete(common.StageRelayKeyAdapter.Encode(source)) +} + +// deleteSubTaskStageOp returns a list of DELETE etcd operation for the subtask stage. +func deleteSubTaskStageOp(stages ...Stage) []clientv3.Op { + ops := make([]clientv3.Op, 0, len(stages)) + for _, stage := range stages { + ops = append(ops, clientv3.OpDelete(common.StageSubTaskKeyAdapter.Encode(stage.Source, stage.Task))) + } + return ops +} diff --git a/pkg/ha/stage_test.go b/pkg/ha/stage_test.go new file mode 100644 index 0000000000..3ca1699d1f --- /dev/null +++ b/pkg/ha/stage_test.go @@ -0,0 +1,216 @@ +// Copyright 2020 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + +package ha + +import ( + "context" + "time" + + . "github.com/pingcap/check" + + "github.com/pingcap/dm/dm/pb" +) + +func (t *testForEtcd) TestStageJSON(c *C) { + // stage for relay. + rs1 := NewRelayStage(pb.Stage_Running, "mysql-replica-1") + j, err := rs1.toJSON() + c.Assert(err, IsNil) + c.Assert(j, Equals, `{"expect":2,"source":"mysql-replica-1"}`) + c.Assert(j, Equals, rs1.String()) + + rs2, err := stageFromJSON(j) + c.Assert(err, IsNil) + c.Assert(rs2, DeepEquals, rs1) + + // stage for subtask. + sts1 := NewSubTaskStage(pb.Stage_Paused, "mysql-replica-1", "task1") + j, err = sts1.toJSON() + c.Assert(err, IsNil) + c.Assert(j, Equals, `{"expect":3,"source":"mysql-replica-1","task":"task1"}`) + c.Assert(j, Equals, sts1.String()) + + sts2, err := stageFromJSON(j) + c.Assert(err, IsNil) + c.Assert(sts2, DeepEquals, sts1) +} + +func (t *testForEtcd) TestRelayStageEtcd(c *C) { + defer clearTestInfoOperation(c) + + var ( + watchTimeout = 500 * time.Millisecond + source1 = "mysql-replica-1" + source2 = "mysql-replica-2" + emptyStage = Stage{} + stage1 = NewRelayStage(pb.Stage_Running, source1) + stage2 = NewRelayStage(pb.Stage_Paused, source2) + ) + c.Assert(stage1.IsDeleted, IsFalse) + + // no relay stage exist. + st1, rev1, err := GetRelayStage(etcdTestCli, source1) + c.Assert(err, IsNil) + c.Assert(rev1, Equals, int64(0)) + c.Assert(st1, DeepEquals, emptyStage) + + // put two stage. + rev2, err := PutRelayStage(etcdTestCli, stage1, stage2) + c.Assert(err, IsNil) + c.Assert(rev2, Greater, rev1) + + // watch the PUT operation for stage1. + stageCh := make(chan Stage, 10) + errCh := make(chan error, 10) + ctx, cancel := context.WithTimeout(context.Background(), watchTimeout) + WatchRelayStage(ctx, etcdTestCli, source1, rev2, stageCh, errCh) + cancel() + close(stageCh) + close(errCh) + c.Assert(len(stageCh), Equals, 1) + stage1.Revision = rev2 + c.Assert(<-stageCh, DeepEquals, stage1) + c.Assert(len(errCh), Equals, 0) + + // get stage1 back. + st2, rev3, err := GetRelayStage(etcdTestCli, source1) + c.Assert(err, IsNil) + c.Assert(rev3, Equals, rev2) + stage1.Revision = 0 + c.Assert(st2, DeepEquals, stage1) + + // get two stages. + stm, rev3, err := GetAllRelayStage(etcdTestCli) + c.Assert(err, IsNil) + c.Assert(rev3, Equals, rev2) + c.Assert(stm, HasLen, 2) + c.Assert(stm[source1], DeepEquals, stage1) + c.Assert(stm[source2], DeepEquals, stage2) + + // delete stage1. + deleteOp := deleteRelayStageOp(source1) + resp, err := etcdTestCli.Txn(context.Background()).Then(deleteOp).Commit() + c.Assert(err, IsNil) + rev4 := resp.Header.Revision + c.Assert(rev4, Greater, rev3) + + // watch the DELETE operation for stage1. + stageCh = make(chan Stage, 10) + errCh = make(chan error, 10) + ctx, cancel = context.WithTimeout(context.Background(), watchTimeout) + WatchRelayStage(ctx, etcdTestCli, source1, rev4, stageCh, errCh) + cancel() + close(stageCh) + close(errCh) + c.Assert(len(stageCh), Equals, 1) + st3 := <-stageCh + c.Assert(st3.IsDeleted, IsTrue) + c.Assert(len(errCh), Equals, 0) + + // get again, not exists now. + st4, rev5, err := GetRelayStage(etcdTestCli, source1) + c.Assert(err, IsNil) + c.Assert(rev5, Equals, int64(0)) + c.Assert(st4, DeepEquals, emptyStage) +} + +func (t *testForEtcd) TestSubTaskStageEtcd(c *C) { + defer clearTestInfoOperation(c) + + var ( + watchTimeout = 500 * time.Millisecond + source = "mysql-replica-1" + task1 = "task-1" + task2 = "task-2" + stage1 = NewSubTaskStage(pb.Stage_Running, source, task1) + stage2 = NewSubTaskStage(pb.Stage_Paused, source, task2) + ) + + // no stage exists. + st1, rev1, err := GetSubTaskStage(etcdTestCli, source, task1) + c.Assert(err, IsNil) + c.Assert(rev1, Equals, int64(0)) + c.Assert(st1, HasLen, 0) + + // put two stages. + rev2, err := PutSubTaskStage(etcdTestCli, stage1, stage2) + c.Assert(err, IsNil) + c.Assert(rev2, Greater, rev1) + + // watch the PUT operation for stages. + stageCh := make(chan Stage, 10) + errCh := make(chan error, 10) + ctx, cancel := context.WithTimeout(context.Background(), watchTimeout) + WatchSubTaskStage(ctx, etcdTestCli, source, rev2, stageCh, errCh) + cancel() + close(stageCh) + close(errCh) + c.Assert(len(stageCh), Equals, 2) + stage1.Revision = rev2 + stage2.Revision = rev2 + c.Assert(<-stageCh, DeepEquals, stage1) + c.Assert(<-stageCh, DeepEquals, stage2) + c.Assert(len(errCh), Equals, 0) + + stage1.Revision = 0 + stage2.Revision = 0 + // get stages back without specified task. + stm, rev3, err := GetSubTaskStage(etcdTestCli, source, "") + c.Assert(err, IsNil) + c.Assert(rev3, Equals, rev2) + c.Assert(stm, HasLen, 2) + c.Assert(stm[task1], DeepEquals, stage1) + c.Assert(stm[task2], DeepEquals, stage2) + + // get the stage back with specified task. + stm, rev3, err = GetSubTaskStage(etcdTestCli, source, task1) + c.Assert(err, IsNil) + c.Assert(rev3, Equals, rev2) + c.Assert(stm, HasLen, 1) + c.Assert(stm[task1], DeepEquals, stage1) + + // get all stages. + stmm, rev3, err := GetAllSubTaskStage(etcdTestCli) + c.Assert(err, IsNil) + c.Assert(rev3, Equals, rev2) + c.Assert(stmm, HasLen, 1) + c.Assert(stmm[source], HasLen, 2) + c.Assert(stmm[source][task1], DeepEquals, stage1) + c.Assert(stmm[source][task2], DeepEquals, stage2) + + // delete two stages. + rev4, err := DeleteSubTaskStage(etcdTestCli, stage1, stage2) + c.Assert(err, IsNil) + c.Assert(rev4, Greater, rev3) + + // watch the DELETE operation for stages. + stageCh = make(chan Stage, 10) + errCh = make(chan error, 10) + ctx, cancel = context.WithTimeout(context.Background(), watchTimeout) + WatchSubTaskStage(ctx, etcdTestCli, source, rev4, stageCh, errCh) + cancel() + close(stageCh) + close(errCh) + c.Assert(len(stageCh), Equals, 2) + for st2 := range stageCh { + c.Assert(st2.IsDeleted, IsTrue) + } + c.Assert(len(errCh), Equals, 0) + + // get again, not exists now. + stm, rev5, err := GetSubTaskStage(etcdTestCli, source, task1) + c.Assert(err, IsNil) + c.Assert(rev5, Equals, int64(0)) + c.Assert(stm, HasLen, 0) +} diff --git a/pkg/ha/subtask.go b/pkg/ha/subtask.go new file mode 100644 index 0000000000..abb99ed2c1 --- /dev/null +++ b/pkg/ha/subtask.go @@ -0,0 +1,131 @@ +// Copyright 2020 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + +package ha + +import ( + "context" + "fmt" + + "go.etcd.io/etcd/clientv3" + + "github.com/pingcap/dm/dm/common" + "github.com/pingcap/dm/dm/config" + "github.com/pingcap/dm/pkg/etcdutil" +) + +// PutSubTaskCfg puts the subtask configs of the specified source and task name into etcd. +// k/k/v: sourceID, taskName -> subtask config. +func PutSubTaskCfg(cli *clientv3.Client, cfgs ...config.SubTaskConfig) (int64, error) { + ops, err := putSubTaskCfgOp(cfgs...) + if err != nil { + return 0, err + } + + return etcdutil.DoOpsInOneTxn(cli, ops...) +} + +// GetSubTaskCfg gets the subtask config of the specified source and task name. +// if the config for the source not exist, return with `err == nil` and `revision=0`. +// if taskName is "", will return all the subtaskConfigs as a map{taskName: subtaskConfig} of the source +// if taskName if given, will return a map{taskName: subtaskConfig} whose length is 1 +func GetSubTaskCfg(cli *clientv3.Client, source, taskName string, rev int64) (map[string]config.SubTaskConfig, int64, error) { + ctx, cancel := context.WithTimeout(cli.Ctx(), etcdutil.DefaultRequestTimeout) + defer cancel() + + tsm := make(map[string]config.SubTaskConfig) + var ( + resp *clientv3.GetResponse + err error + ) + if taskName != "" { + resp, err = cli.Get(ctx, common.UpstreamSubTaskKeyAdapter.Encode(source, taskName), clientv3.WithRev(rev)) + } else { + resp, err = cli.Get(ctx, common.UpstreamSubTaskKeyAdapter.Encode(source), clientv3.WithPrefix(), clientv3.WithRev(rev)) + } + + if err != nil { + return tsm, 0, err + } + + if resp.Count == 0 { + return tsm, 0, nil + } else if taskName != "" && resp.Count > 1 { + // TODO(lichunzhu): add terror. + // this should not happen. + return tsm, 0, fmt.Errorf("too many config (%d) exist for the subtask {sourceID: %s, task name: %s}", resp.Count, source, taskName) + } + + for _, kvs := range resp.Kvs { + cfg := config.SubTaskConfig{} + err = cfg.Decode(string(kvs.Value)) + if err != nil { + return tsm, 0, err + } + + tsm[cfg.Name] = cfg + } + + return tsm, resp.Header.Revision, nil +} + +// GetAllSubTaskCfg gets all subtask configs. +// k/v: source ID -> task name -> subtask config +func GetAllSubTaskCfg(cli *clientv3.Client) (map[string]map[string]config.SubTaskConfig, int64, error) { + ctx, cancel := context.WithTimeout(cli.Ctx(), etcdutil.DefaultRequestTimeout) + defer cancel() + + resp, err := cli.Get(ctx, common.UpstreamSubTaskKeyAdapter.Path(), clientv3.WithPrefix()) + + if err != nil { + return nil, 0, err + } + + cfgs := make(map[string]map[string]config.SubTaskConfig) + for _, kvs := range resp.Kvs { + cfg := config.SubTaskConfig{} + err = cfg.Decode(string(kvs.Value)) + if err != nil { + return nil, 0, err + } + if _, ok := cfgs[cfg.SourceID]; !ok { + cfgs[cfg.SourceID] = make(map[string]config.SubTaskConfig) + } + cfgs[cfg.SourceID][cfg.Name] = cfg + } + + return cfgs, resp.Header.Revision, nil +} + +// putSubTaskCfgOp returns a PUT etcd operation for the subtask config. +func putSubTaskCfgOp(cfgs ...config.SubTaskConfig) ([]clientv3.Op, error) { + ops := make([]clientv3.Op, 0, len(cfgs)) + for _, cfg := range cfgs { + value, err := cfg.Toml() + if err != nil { + return ops, err + } + key := common.UpstreamSubTaskKeyAdapter.Encode(cfg.SourceID, cfg.Name) + ops = append(ops, clientv3.OpPut(key, value)) + } + return ops, nil +} + +// deleteSubTaskCfgOp returns a DELETE etcd operation for the subtask config. +func deleteSubTaskCfgOp(cfgs ...config.SubTaskConfig) []clientv3.Op { + ops := make([]clientv3.Op, 0, len(cfgs)) + for _, cfg := range cfgs { + ops = append(ops, clientv3.OpDelete(common.UpstreamSubTaskKeyAdapter.Encode(cfg.SourceID, cfg.Name))) + } + return ops +} diff --git a/pkg/ha/subtask_test.go b/pkg/ha/subtask_test.go new file mode 100644 index 0000000000..f41f934a99 --- /dev/null +++ b/pkg/ha/subtask_test.go @@ -0,0 +1,113 @@ +// Copyright 2020 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + +package ha + +import ( + "context" + + . "github.com/pingcap/check" + + "github.com/pingcap/dm/dm/config" +) + +const ( + // do not forget to update this path if the file removed/renamed. + subTaskSampleFile = "../../dm/worker/subtask.toml" +) + +func (t *testForEtcd) TestSubTaskEtcd(c *C) { + defer clearTestInfoOperation(c) + + cfg1 := config.SubTaskConfig{} + c.Assert(cfg1.DecodeFile(subTaskSampleFile), IsNil) + source := cfg1.SourceID + taskName1 := cfg1.Name + + taskName2 := taskName1 + "2" + cfg2 := cfg1 + cfg2.Name = taskName2 + err := cfg2.Adjust() + c.Assert(err, IsNil) + + // no subtask config exist. + tsm1, rev1, err := GetSubTaskCfg(etcdTestCli, source, taskName1, 0) + c.Assert(err, IsNil) + c.Assert(rev1, Equals, int64(0)) + c.Assert(tsm1, HasLen, 0) + + // put subtask configs. + rev2, err := PutSubTaskCfg(etcdTestCli, cfg1, cfg2) + c.Assert(err, IsNil) + c.Assert(rev2, Greater, rev1) + + // get single config back. + tsm2, rev3, err := GetSubTaskCfg(etcdTestCli, source, taskName1, 0) + c.Assert(err, IsNil) + c.Assert(rev3, Equals, rev2) + c.Assert(tsm2, HasLen, 1) + c.Assert(tsm2, HasKey, taskName1) + c.Assert(tsm2[taskName1], DeepEquals, cfg1) + + tsm3, rev4, err := GetSubTaskCfg(etcdTestCli, source, "", 0) + c.Assert(err, IsNil) + c.Assert(rev4, Equals, rev3) + c.Assert(tsm3, HasLen, 2) + c.Assert(tsm3, HasKey, taskName1) + c.Assert(tsm3, HasKey, taskName2) + c.Assert(tsm3[taskName1], DeepEquals, cfg1) + c.Assert(tsm3[taskName2], DeepEquals, cfg2) + + // get all subtask configs. + stmm, rev4, err := GetAllSubTaskCfg(etcdTestCli) + c.Assert(err, IsNil) + c.Assert(rev4, Equals, rev3) + c.Assert(stmm, HasLen, 1) + c.Assert(stmm[source], HasLen, 2) + c.Assert(stmm[source][taskName1], DeepEquals, cfg1) + c.Assert(stmm[source][taskName2], DeepEquals, cfg2) + + // delete the config. + deleteOps := deleteSubTaskCfgOp(cfg1) + _, err = etcdTestCli.Txn(context.Background()).Then(deleteOps...).Commit() + c.Assert(err, IsNil) + deleteOps = deleteSubTaskCfgOp(cfg2) + _, err = etcdTestCli.Txn(context.Background()).Then(deleteOps...).Commit() + c.Assert(err, IsNil) + + // get again, not exists now. + tsm4, rev5, err := GetSubTaskCfg(etcdTestCli, source, taskName1, 0) + c.Assert(err, IsNil) + c.Assert(rev5, Equals, int64(0)) + c.Assert(tsm4, HasLen, 0) + + // put subtask config. + rev6, err := PutSubTaskCfg(etcdTestCli, cfg1) + c.Assert(err, IsNil) + c.Assert(rev6, Greater, int64(0)) + + // update subtask config. + cfg3 := cfg1 + cfg3.SourceID = "testForRevision" + rev7, err := PutSubTaskCfg(etcdTestCli, cfg3) + c.Assert(err, IsNil) + c.Assert(rev7, Greater, rev6) + + // get subtask from rev6. shoule be equal to cfg1 + tsm5, rev8, err := GetSubTaskCfg(etcdTestCli, source, taskName1, rev6) + c.Assert(err, IsNil) + c.Assert(rev8, Equals, rev7) + c.Assert(tsm5, HasLen, 1) + c.Assert(tsm5, HasKey, taskName1) + c.Assert(tsm5[taskName1], DeepEquals, cfg1) +} diff --git a/pkg/ha/worker.go b/pkg/ha/worker.go new file mode 100644 index 0000000000..f3035a3fc1 --- /dev/null +++ b/pkg/ha/worker.go @@ -0,0 +1,100 @@ +// Copyright 2020 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + +package ha + +import ( + "context" + "encoding/json" + + "go.etcd.io/etcd/clientv3" + + "github.com/pingcap/dm/dm/common" + "github.com/pingcap/dm/pkg/etcdutil" +) + +// WorkerInfo represents the node information of the DM-worker. +type WorkerInfo struct { + Name string `json:"name"` // the name of the node. + Addr string `json:"addr"` // the client address of the node to advertise. +} + +// NewWorkerInfo creates a new WorkerInfo instance. +func NewWorkerInfo(name, addr string) WorkerInfo { + return WorkerInfo{ + Name: name, + Addr: addr, + } +} + +// String implements Stringer interface. +func (i WorkerInfo) String() string { + s, _ := i.toJSON() + return s +} + +// toJSON returns the string of JSON represent. +func (i WorkerInfo) toJSON() (string, error) { + data, err := json.Marshal(i) + if err != nil { + return "", err + } + return string(data), nil +} + +// workerInfoFromJSON constructs WorkerInfo from its JSON represent. +func workerInfoFromJSON(s string) (i WorkerInfo, err error) { + err = json.Unmarshal([]byte(s), &i) + return +} + +// PutWorkerInfo puts the DM-worker info into etcd. +// k/v: worker-name -> worker information. +func PutWorkerInfo(cli *clientv3.Client, info WorkerInfo) (int64, error) { + value, err := info.toJSON() + if err != nil { + return 0, err + } + key := common.WorkerRegisterKeyAdapter.Encode(info.Name) + + return etcdutil.DoOpsInOneTxn(cli, clientv3.OpPut(key, value)) +} + +// GetAllWorkerInfo gets all DM-worker info in etcd currently. +// k/v: worker-name -> worker information. +func GetAllWorkerInfo(cli *clientv3.Client) (map[string]WorkerInfo, int64, error) { + ctx, cancel := context.WithTimeout(cli.Ctx(), etcdutil.DefaultRequestTimeout) + defer cancel() + + resp, err := cli.Get(ctx, common.WorkerRegisterKeyAdapter.Path(), clientv3.WithPrefix()) + if err != nil { + return nil, 0, err + } + + ifm := make(map[string]WorkerInfo) + for _, kv := range resp.Kvs { + info, err2 := workerInfoFromJSON(string(kv.Value)) + if err2 != nil { + return nil, 0, err2 + } + + ifm[info.Name] = info + } + + return ifm, resp.Header.Revision, nil +} + +// DeleteWorkerInfo deletes the specified DM-worker information. +func DeleteWorkerInfo(cli *clientv3.Client, worker string) (int64, error) { + return etcdutil.DoOpsInOneTxn(cli, clientv3.OpDelete(common.WorkerRegisterKeyAdapter.Encode(worker))) +} diff --git a/pkg/ha/worker_test.go b/pkg/ha/worker_test.go new file mode 100644 index 0000000000..d731349879 --- /dev/null +++ b/pkg/ha/worker_test.go @@ -0,0 +1,75 @@ +// Copyright 2020 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + +package ha + +import ( + . "github.com/pingcap/check" +) + +func (t *testForEtcd) TestWorkerInfoJSON(c *C) { + i1 := NewWorkerInfo("dm-worker-1", "192.168.0.100:8262") + + j, err := i1.toJSON() + c.Assert(err, IsNil) + c.Assert(j, Equals, `{"name":"dm-worker-1","addr":"192.168.0.100:8262"}`) + c.Assert(j, Equals, i1.String()) + + i2, err := workerInfoFromJSON(j) + c.Assert(err, IsNil) + c.Assert(i2, DeepEquals, i1) +} + +func (t *testForEtcd) TestWorkerInfoEtcd(c *C) { + defer clearTestInfoOperation(c) + + var ( + worker1 = "dm-worker-1" + worker2 = "dm-worker-2" + info1 = NewWorkerInfo(worker1, "192.168.0.100:8262") + info2 = NewWorkerInfo(worker2, "192.168.0.101:8262") + ) + + // get without info. + ifm, _, err := GetAllWorkerInfo(etcdTestCli) + c.Assert(err, IsNil) + c.Assert(ifm, HasLen, 0) + + // put two info. + rev1, err := PutWorkerInfo(etcdTestCli, info1) + c.Assert(err, IsNil) + c.Assert(rev1, Greater, int64(0)) + rev2, err := PutWorkerInfo(etcdTestCli, info2) + c.Assert(err, IsNil) + c.Assert(rev2, Greater, rev1) + + // get again, with two info. + ifm, rev3, err := GetAllWorkerInfo(etcdTestCli) + c.Assert(err, IsNil) + c.Assert(rev3, Equals, rev2) + c.Assert(ifm, HasLen, 2) + c.Assert(ifm[worker1], DeepEquals, info1) + c.Assert(ifm[worker2], DeepEquals, info2) + + // delete info1. + rev4, err := DeleteWorkerInfo(etcdTestCli, worker1) + c.Assert(err, IsNil) + c.Assert(rev4, Greater, rev3) + + // get again, with only one info. + ifm, rev5, err := GetAllWorkerInfo(etcdTestCli) + c.Assert(err, IsNil) + c.Assert(rev5, Equals, rev4) + c.Assert(ifm, HasLen, 1) + c.Assert(ifm[worker2], DeepEquals, info2) +} diff --git a/pkg/terror/error_list.go b/pkg/terror/error_list.go index c519f9bde4..f05ecd2b73 100644 --- a/pkg/terror/error_list.go +++ b/pkg/terror/error_list.go @@ -403,8 +403,7 @@ const ( codeMasterStartEmbedEtcdFail codeMasterParseURLFail codeMasterJoinEmbedEtcdFail - codeMasterCoordinatorNotStart - codeMasterAcquireWorkerFailed + codeMasterInvalidOperateTaskOp codeMasterAdvertiseAddrNotValid codeMasterRequestIsNotForwardToLeader ) @@ -511,6 +510,27 @@ const ( codeSchemaTrackerCannotParseDownstreamTable ) +// HA scheduler. +const ( + codeSchedulerNotStarted ErrCode = iota + 46001 + codeSchedulerStarted + codeSchedulerWorkerExist + codeSchedulerWorkerNotExist + codeSchedulerWorkerOnline + codeSchedulerWorkerInvalidTrans + codeSchedulerSourceCfgExist + codeSchedulerSourceCfgNotExist + codeSchedulerSourcesUnbound + codeSchedulerSourceOpTaskExist + codeSchedulerRelayStageInvalidUpdate + codeSchedulerRelayStageSourceNotExist + codeSchedulerMultiTask + codeSchedulerSubTaskExist + codeSchedulerSubTaskStageInvalidUpdate + codeSchedulerSubTaskOpTaskNotExist + codeSchedulerSubTaskOpSourceNotExist +) + // Error instances var ( // Database operation related error @@ -881,8 +901,7 @@ var ( ErrMasterStartEmbedEtcdFail = New(codeMasterStartEmbedEtcdFail, ClassDMMaster, ScopeInternal, LevelHigh, "fail to start embed etcd") ErrMasterParseURLFail = New(codeMasterParseURLFail, ClassDMMaster, ScopeInternal, LevelHigh, "fail to parse URL %s") ErrMasterJoinEmbedEtcdFail = New(codeMasterJoinEmbedEtcdFail, ClassDMMaster, ScopeInternal, LevelHigh, "fail to join embed etcd: %s") - ErrMasterCoordinatorNotStart = New(codeMasterCoordinatorNotStart, ClassDMMaster, ScopeInternal, LevelHigh, "coordinator does not start") - ErrMasterAcquireWorkerFailed = New(codeMasterAcquireWorkerFailed, ClassDMMaster, ScopeInternal, LevelMedium, "acquire worker failed: %s") + ErrMasterInvalidOperateTaskOp = New(codeMasterInvalidOperateTaskOp, ClassDMMaster, ScopeInternal, LevelMedium, "invalid op %s on task") ErrMasterAdvertiseAddrNotValid = New(codeMasterAdvertiseAddrNotValid, ClassDMMaster, ScopeInternal, LevelHigh, "advertise address %s not valid") ErrMasterRequestIsNotForwardToLeader = New(codeMasterRequestIsNotForwardToLeader, ClassDMMaster, ScopeInternal, LevelHigh, "master is not leader, and can't forward request to leader") @@ -987,4 +1006,23 @@ var ( ErrSchemaTrackerCannotParseDownstreamTable = New( codeSchemaTrackerCannotParseDownstreamTable, ClassSchemaTracker, ScopeInternal, LevelHigh, "cannot parse downstream table schema of `%s`.`%s` to initialize upstream schema `%s`.`%s` in schema tracker") + + // HA scheduler + ErrSchedulerNotStarted = New(codeSchedulerNotStarted, ClassScheduler, ScopeInternal, LevelHigh, "the scheduler has not started") + ErrSchedulerStarted = New(codeSchedulerStarted, ClassScheduler, ScopeInternal, LevelMedium, "the scheduler has already started") + ErrSchedulerWorkerExist = New(codeSchedulerWorkerExist, ClassScheduler, ScopeInternal, LevelMedium, "dm-worker with name %s already exists") + ErrSchedulerWorkerNotExist = New(codeSchedulerWorkerNotExist, ClassScheduler, ScopeInternal, LevelMedium, "dm-worker with name %s not exists") + ErrSchedulerWorkerOnline = New(codeSchedulerWorkerOnline, ClassScheduler, ScopeInternal, LevelMedium, "dm-worker with name %s is still online, must shut it down first") + ErrSchedulerWorkerInvalidTrans = New(codeSchedulerWorkerInvalidTrans, ClassScheduler, ScopeInternal, LevelMedium, "invalid stage transformation for dm-worker %s, from %s to %s") + ErrSchedulerSourceCfgExist = New(codeSchedulerSourceCfgExist, ClassScheduler, ScopeInternal, LevelMedium, "source config with ID %s already exists") + ErrSchedulerSourceCfgNotExist = New(codeSchedulerSourceCfgNotExist, ClassScheduler, ScopeInternal, LevelMedium, "source config with ID %s not exists") + ErrSchedulerSourcesUnbound = New(codeSchedulerSourcesUnbound, ClassDMMaster, ScopeInternal, LevelMedium, "sources %v have not bound") + ErrSchedulerSourceOpTaskExist = New(codeSchedulerSourceOpTaskExist, ClassDMMaster, ScopeInternal, LevelMedium, "source with name % need to operate with tasks %v exist") + ErrSchedulerRelayStageInvalidUpdate = New(codeSchedulerRelayStageInvalidUpdate, ClassScheduler, ScopeInternal, LevelMedium, "invalid new expectant relay stage %s") + ErrSchedulerRelayStageSourceNotExist = New(codeSchedulerRelayStageSourceNotExist, ClassScheduler, ScopeInternal, LevelMedium, "sources %v need to update expectant relay stage not exist") + ErrSchedulerMultiTask = New(codeSchedulerMultiTask, ClassScheduler, ScopeInternal, LevelMedium, "the scheduler cannot perform multiple different tasks %v in one operation") + ErrSchedulerSubTaskExist = New(codeSchedulerSubTaskExist, ClassScheduler, ScopeInternal, LevelMedium, "subtasks with name %s for sources %v already exist") + ErrSchedulerSubTaskStageInvalidUpdate = New(codeSchedulerSubTaskStageInvalidUpdate, ClassDMMaster, ScopeInternal, LevelMedium, "invalid new expectant subtask stage %s") + ErrSchedulerSubTaskOpTaskNotExist = New(codeSchedulerSubTaskOpTaskNotExist, ClassDMMaster, ScopeInternal, LevelMedium, "subtasks with name %s need to be operate not exist") + ErrSchedulerSubTaskOpSourceNotExist = New(codeSchedulerSubTaskOpSourceNotExist, ClassDMMaster, ScopeInternal, LevelMedium, "sources %v need to be operate not exist") ) diff --git a/pkg/terror/terror.go b/pkg/terror/terror.go index 2383ad9947..267cfdfd44 100644 --- a/pkg/terror/terror.go +++ b/pkg/terror/terror.go @@ -48,6 +48,7 @@ const ( ClassDMWorker ClassDMTracer ClassSchemaTracker + ClassScheduler ) var errClass2Str = map[ErrClass]string{ @@ -66,6 +67,7 @@ var errClass2Str = map[ErrClass]string{ ClassDMWorker: "dm-worker", ClassDMTracer: "dm-tracer", ClassSchemaTracker: "schema-tracker", + ClassScheduler: "scheduler", } // String implements fmt.Stringer interface diff --git a/tests/_utils/test_prepare b/tests/_utils/test_prepare index e4b613ec14..3a6bc18d45 100644 --- a/tests/_utils/test_prepare +++ b/tests/_utils/test_prepare @@ -53,8 +53,11 @@ function dmctl_start_task_standalone() { fi run_dm_ctl $WORK_DIR "127.0.0.1:$MASTER_PORT" \ "start-task $task_conf" \ - "\"result\": true" 2 \ - "\"source\": \"$SOURCE_ID1\"" 1 + "\"result\": true" 1 +# run_dm_ctl $WORK_DIR "127.0.0.1:$MASTER_PORT" \ +# "start-task $task_conf" \ +# "\"result\": true" 2 \ +# "\"source\": \"$SOURCE_ID1\"" 1 } # shortcut for start task on two DM-workers @@ -66,9 +69,13 @@ function dmctl_start_task() { fi run_dm_ctl $WORK_DIR "127.0.0.1:$MASTER_PORT" \ "start-task $task_conf" \ - "\"result\": true" 3 \ - "\"source\"": 2 \ - "\"sources\"": 1 + "\"result\": true" 1 + +# run_dm_ctl $WORK_DIR "127.0.0.1:$MASTER_PORT" \ +# "start-task $task_conf" \ +# "\"result\": true" 3 \ +# "\"source\"": 2 \ +# "\"sources\"": 1 } # shortcut for stop task on two DM-workers @@ -92,10 +99,13 @@ function dmctl_resume_task() { function dmctl_operate_task() { task_name=$1 operate=$2 - run_dm_ctl $WORK_DIR "127.0.0.1:$MASTER_PORT" \ "$operate $task_name" \ - "\"result\": true" 3 \ - "\"source\": \"$SOURCE_ID1\"" 1 \ - "\"source\": \"$SOURCE_ID2\"" 1 + "\"result\": true" 1 + +# run_dm_ctl $WORK_DIR "127.0.0.1:$MASTER_PORT" \ +# "$operate $task_name" \ +# "\"result\": true" 3 \ +# "\"source\": \"$SOURCE_ID1\"" 1 \ +# "\"source\": \"$SOURCE_ID2\"" 1 } diff --git a/tests/all_mode/conf/dm-task.yaml b/tests/all_mode/conf/dm-task.yaml index 4e22d0768c..43dac2ad4b 100644 --- a/tests/all_mode/conf/dm-task.yaml +++ b/tests/all_mode/conf/dm-task.yaml @@ -4,7 +4,7 @@ task-mode: all is-sharding: false meta-schema: "dm_meta" remove-meta: false -enable-heartbeat: true +# enable-heartbeat: true heartbeat-update-interval: 1 heartbeat-report-interval: 1 timezone: "Asia/Shanghai" diff --git a/tests/all_mode/conf/mysql1.toml b/tests/all_mode/conf/source1.toml similarity index 100% rename from tests/all_mode/conf/mysql1.toml rename to tests/all_mode/conf/source1.toml diff --git a/tests/all_mode/conf/mysql2.toml b/tests/all_mode/conf/source2.toml similarity index 100% rename from tests/all_mode/conf/mysql2.toml rename to tests/all_mode/conf/source2.toml diff --git a/tests/all_mode/run.sh b/tests/all_mode/run.sh index 83bc2ad9c2..e929318554 100755 --- a/tests/all_mode/run.sh +++ b/tests/all_mode/run.sh @@ -23,15 +23,15 @@ function run() { run_dm_worker $WORK_DIR/worker2 $WORKER2_PORT $cur/conf/dm-worker2.toml check_rpc_alive $cur/../bin/check_worker_online 127.0.0.1:$WORKER2_PORT # operate mysql config to worker - cp $cur/conf/mysql1.toml $WORK_DIR/mysql1.toml - cp $cur/conf/mysql2.toml $WORK_DIR/mysql2.toml - sed -i "/relay-binlog-name/i\relay-dir = \"$WORK_DIR/worker1/relay_log\"" $WORK_DIR/mysql1.toml - sed -i "/relay-binlog-name/i\relay-dir = \"$WORK_DIR/worker2/relay_log\"" $WORK_DIR/mysql2.toml + cp $cur/conf/source1.toml $WORK_DIR/source1.toml + cp $cur/conf/source2.toml $WORK_DIR/source2.toml + sed -i "/relay-binlog-name/i\relay-dir = \"$WORK_DIR/worker1/relay_log\"" $WORK_DIR/source1.toml + sed -i "/relay-binlog-name/i\relay-dir = \"$WORK_DIR/worker2/relay_log\"" $WORK_DIR/source2.toml run_dm_ctl $WORK_DIR "127.0.0.1:$MASTER_PORT" \ - "operate-worker create $WORK_DIR/mysql1.toml" \ + "operate-source create $WORK_DIR/source1.toml" \ "true" 1 run_dm_ctl $WORK_DIR "127.0.0.1:$MASTER_PORT" \ - "operate-worker create $WORK_DIR/mysql2.toml" \ + "operate-source create $WORK_DIR/source2.toml" \ "true" 1 @@ -54,8 +54,8 @@ function run() { echo "after restart dm-worker, task should resume automatically" run_dm_ctl $WORK_DIR "127.0.0.1:$MASTER_PORT" \ "start-task $cur/conf/dm-task.yaml" \ - "\"result\": true" 1 \ - "start sub task test: sub task test already exists" 2 + "\"result\": false" 1 \ + "subtasks with name test for sources \[mysql-replica-01 mysql-replica-02\] already exist" 1 sleep 2 # wait for task running @@ -85,8 +85,8 @@ function run() { # use sync_diff_inspector to check data now! check_sync_diff $WORK_DIR $cur/conf/diff_config.toml - check_metric $WORKER1_PORT 'dm_syncer_replication_lag{task="test"}' 3 0 1 - check_metric $WORKER2_PORT 'dm_syncer_replication_lag{task="test"}' 3 0 1 + # check_metric $WORKER1_PORT 'dm_syncer_replication_lag{task="test"}' 3 0 1 + # check_metric $WORKER2_PORT 'dm_syncer_replication_lag{task="test"}' 3 0 1 export GO_FAILPOINTS='' } diff --git a/tests/compatibility/conf/dm-task.yaml b/tests/compatibility/conf/dm-task.yaml index 08fe6877a4..e3618b621a 100644 --- a/tests/compatibility/conf/dm-task.yaml +++ b/tests/compatibility/conf/dm-task.yaml @@ -4,7 +4,7 @@ task-mode: all is-sharding: false meta-schema: "dm_meta" remove-meta: false -enable-heartbeat: true +# enable-heartbeat: true heartbeat-update-interval: 1 heartbeat-report-interval: 1 timezone: "Asia/Shanghai" diff --git a/tests/compatibility/conf/mysql1.toml b/tests/compatibility/conf/source1.toml similarity index 100% rename from tests/compatibility/conf/mysql1.toml rename to tests/compatibility/conf/source1.toml diff --git a/tests/compatibility/conf/mysql2.toml b/tests/compatibility/conf/source2.toml similarity index 100% rename from tests/compatibility/conf/mysql2.toml rename to tests/compatibility/conf/source2.toml diff --git a/tests/dm_syncer/conf/diff_config.toml b/tests/dm_syncer/conf/diff_config.toml index 7d904b9834..147c2b5fd9 100644 --- a/tests/dm_syncer/conf/diff_config.toml +++ b/tests/dm_syncer/conf/diff_config.toml @@ -16,25 +16,25 @@ fix-sql-file = "fix.sql" # tables need to check. [[check-tables]] -schema = "incremental_mode" +schema = "dm_syncer" tables = ["~t.*"] [[table-config]] -schema = "incremental_mode" +schema = "dm_syncer" table = "t1" [[table-config.source-tables]] instance-id = "source-1" -schema = "incremental_mode" +schema = "dm_syncer" table = "t1" [[table-config]] -schema = "incremental_mode" +schema = "dm_syncer" table = "t2" [[table-config.source-tables]] instance-id = "source-2" -schema = "incremental_mode" +schema = "dm_syncer" table = "t2" [[source-db]] diff --git a/tests/dm_syncer/conf/dm-task.yaml b/tests/dm_syncer/conf/dm-task.yaml index eb9f003856..e1e49f45c9 100644 --- a/tests/dm_syncer/conf/dm-task.yaml +++ b/tests/dm_syncer/conf/dm-task.yaml @@ -4,7 +4,7 @@ task-mode: "full" is-sharding: false meta-schema: "dm_meta" remove-meta: true -enable-heartbeat: true +# enable-heartbeat: true heartbeat-update-interval: 1 heartbeat-report-interval: 1 timezone: "Asia/Shanghai" @@ -36,7 +36,7 @@ mysql-instances: black-white-list: instance: - do-dbs: ["incremental_mode"] + do-dbs: ["dm_syncer"] mydumpers: global: diff --git a/tests/dm_syncer/conf/mysql1.toml b/tests/dm_syncer/conf/source1.toml similarity index 100% rename from tests/dm_syncer/conf/mysql1.toml rename to tests/dm_syncer/conf/source1.toml diff --git a/tests/dm_syncer/conf/mysql2.toml b/tests/dm_syncer/conf/source2.toml similarity index 100% rename from tests/dm_syncer/conf/mysql2.toml rename to tests/dm_syncer/conf/source2.toml diff --git a/tests/dm_syncer/data/db1.increment.sql b/tests/dm_syncer/data/db1.increment.sql index 13112bc38f..e8f75e3ea2 100644 --- a/tests/dm_syncer/data/db1.increment.sql +++ b/tests/dm_syncer/data/db1.increment.sql @@ -1,4 +1,4 @@ -use incremental_mode; +use dm_syncer; insert into t1 (id, name) values (3, 'Eddard Stark'); update t1 set name = 'Arya Stark' where id = 1; update t1 set name = 'Catelyn Stark' where name = 'catelyn'; diff --git a/tests/dm_syncer/data/db1.prepare.sql b/tests/dm_syncer/data/db1.prepare.sql index 36a29a51e0..10b34f9a23 100644 --- a/tests/dm_syncer/data/db1.prepare.sql +++ b/tests/dm_syncer/data/db1.prepare.sql @@ -1,5 +1,5 @@ -drop database if exists `incremental_mode`; -create database `incremental_mode`; -use `incremental_mode`; +drop database if exists `dm_syncer`; +create database `dm_syncer`; +use `dm_syncer`; create table t1 (id int, name varchar(20)); insert into t1 (id, name) values (1, 'arya'), (2, 'catelyn'); diff --git a/tests/dm_syncer/data/db1.prepare.user.sql b/tests/dm_syncer/data/db1.prepare.user.sql index 62d30b2525..43de3b61df 100644 --- a/tests/dm_syncer/data/db1.prepare.user.sql +++ b/tests/dm_syncer/data/db1.prepare.user.sql @@ -1,7 +1,7 @@ -drop user if exists 'dm_incremental'; +drop user if exists 'dm_syncer'; flush privileges; -create user 'dm_incremental'@'%' identified by ''; -grant all privileges on *.* to 'dm_incremental'@'%'; -revoke select, reload on *.* from 'dm_incremental'@'%'; -revoke create temporary tables, lock tables, create routine, alter routine, event, create tablespace, file, shutdown, execute, process, index on *.* from 'dm_incremental'@'%'; # privileges not supported by TiDB +create user 'dm_syncer'@'%' identified by ''; +grant all privileges on *.* to 'dm_syncer'@'%'; +revoke select, reload on *.* from 'dm_syncer'@'%'; +revoke create temporary tables, lock tables, create routine, alter routine, event, create tablespace, file, shutdown, execute, process, index on *.* from 'dm_syncer'@'%'; # privileges not supported by TiDB flush privileges; diff --git a/tests/dm_syncer/data/db2.increment.sql b/tests/dm_syncer/data/db2.increment.sql index 457960d31f..3ae3b24ae8 100644 --- a/tests/dm_syncer/data/db2.increment.sql +++ b/tests/dm_syncer/data/db2.increment.sql @@ -1,2 +1,2 @@ -use incremental_mode; +use dm_syncer; delete from t2 where name = 'Sansa'; diff --git a/tests/dm_syncer/data/db2.prepare.sql b/tests/dm_syncer/data/db2.prepare.sql index feff266165..56ed119a9e 100644 --- a/tests/dm_syncer/data/db2.prepare.sql +++ b/tests/dm_syncer/data/db2.prepare.sql @@ -1,5 +1,5 @@ -drop database if exists `incremental_mode`; -create database `incremental_mode`; -use `incremental_mode`; +drop database if exists `dm_syncer`; +create database `dm_syncer`; +use `dm_syncer`; create table t2 (id int auto_increment, name varchar(20), primary key (`id`)); insert into t2 (name) values ('Arya'), ('Bran'), ('Sansa'); diff --git a/tests/dm_syncer/data/db2.prepare.user.sql b/tests/dm_syncer/data/db2.prepare.user.sql index 62d30b2525..43de3b61df 100644 --- a/tests/dm_syncer/data/db2.prepare.user.sql +++ b/tests/dm_syncer/data/db2.prepare.user.sql @@ -1,7 +1,7 @@ -drop user if exists 'dm_incremental'; +drop user if exists 'dm_syncer'; flush privileges; -create user 'dm_incremental'@'%' identified by ''; -grant all privileges on *.* to 'dm_incremental'@'%'; -revoke select, reload on *.* from 'dm_incremental'@'%'; -revoke create temporary tables, lock tables, create routine, alter routine, event, create tablespace, file, shutdown, execute, process, index on *.* from 'dm_incremental'@'%'; # privileges not supported by TiDB +create user 'dm_syncer'@'%' identified by ''; +grant all privileges on *.* to 'dm_syncer'@'%'; +revoke select, reload on *.* from 'dm_syncer'@'%'; +revoke create temporary tables, lock tables, create routine, alter routine, event, create tablespace, file, shutdown, execute, process, index on *.* from 'dm_syncer'@'%'; # privileges not supported by TiDB flush privileges; diff --git a/tests/dm_syncer/run.sh b/tests/dm_syncer/run.sh index 214079ce1a..702fa68a63 100755 --- a/tests/dm_syncer/run.sh +++ b/tests/dm_syncer/run.sh @@ -20,15 +20,15 @@ function run() { run_dm_worker $WORK_DIR/worker2 $WORKER2_PORT $cur/conf/dm-worker2.toml check_rpc_alive $cur/../bin/check_worker_online 127.0.0.1:$WORKER2_PORT # operate mysql config to worker - cp $cur/conf/mysql1.toml $WORK_DIR/mysql1.toml - cp $cur/conf/mysql2.toml $WORK_DIR/mysql2.toml - sed -i "/relay-binlog-name/i\relay-dir = \"$WORK_DIR/worker1/relay_log\"" $WORK_DIR/mysql1.toml - sed -i "/relay-binlog-name/i\relay-dir = \"$WORK_DIR/worker2/relay_log\"" $WORK_DIR/mysql2.toml + cp $cur/conf/source1.toml $WORK_DIR/source1.toml + cp $cur/conf/source2.toml $WORK_DIR/source2.toml + sed -i "/relay-binlog-name/i\relay-dir = \"$WORK_DIR/worker1/relay_log\"" $WORK_DIR/source1.toml + sed -i "/relay-binlog-name/i\relay-dir = \"$WORK_DIR/worker2/relay_log\"" $WORK_DIR/source2.toml run_dm_ctl $WORK_DIR "127.0.0.1:$MASTER_PORT" \ - "operate-worker create $WORK_DIR/mysql1.toml" \ + "operate-source create $WORK_DIR/source1.toml" \ "true" 1 run_dm_ctl $WORK_DIR "127.0.0.1:$MASTER_PORT" \ - "operate-worker create $WORK_DIR/mysql2.toml" \ + "operate-source create $WORK_DIR/source2.toml" \ "true" 1 # start a task in `full` mode diff --git a/tests/dmctl_basic/check_list/operate_mysql_worker.sh b/tests/dmctl_basic/check_list/operate_mysql_worker.sh deleted file mode 100644 index 1ae8d84261..0000000000 --- a/tests/dmctl_basic/check_list/operate_mysql_worker.sh +++ /dev/null @@ -1,28 +0,0 @@ -#!/bin/bash - -function operate_mysql_worker_empty_arg() { - run_dm_ctl $WORK_DIR "127.0.0.1:$MASTER_PORT" \ - "operate-worker" \ - "operate-worker \[flags\]" 1 -} - -function operate_mysql_worker_wrong_config_file() { - run_dm_ctl $WORK_DIR "127.0.0.1:$MASTER_PORT" \ - "operate-worker create not_exists_config_file" \ - "get file content error" 1 -} - -function operate_mysql_worker_while_master_down() { - task_conf=$1 - run_dm_ctl $WORK_DIR "127.0.0.1:$MASTER_PORT" \ - "operate-worker create $task_conf" \ - "can not update task" 1 -} - -function operate_mysql_worker_stop_not_created_config() { - task_conf=$1 - run_dm_ctl $WORK_DIR "127.0.0.1:$MASTER_PORT" \ - "operate-worker stop $task_conf" \ - "Stop Mysql-worker failed. worker has not been started" 1 -} - diff --git a/tests/dmctl_basic/check_list/operate_source.sh b/tests/dmctl_basic/check_list/operate_source.sh new file mode 100644 index 0000000000..a6fb4002d1 --- /dev/null +++ b/tests/dmctl_basic/check_list/operate_source.sh @@ -0,0 +1,28 @@ +#!/bin/bash + +function operate_source_empty_arg() { + run_dm_ctl $WORK_DIR "127.0.0.1:$MASTER_PORT" \ + "operate-source" \ + "operate-source \[flags\]" 1 +} + +function operate_source_wrong_config_file() { + run_dm_ctl $WORK_DIR "127.0.0.1:$MASTER_PORT" \ + "operate-source create not_exists_config_file" \ + "get file content error" 1 +} + +function operate_source_while_master_down() { + source_conf=$1 + run_dm_ctl $WORK_DIR "127.0.0.1:$MASTER_PORT" \ + "operate-source create $source_conf" \ + "can not update task" 1 +} + +function operate_source_stop_not_created_config() { + source_conf=$1 + run_dm_ctl $WORK_DIR "127.0.0.1:$MASTER_PORT" \ + "operate-source stop $source_conf" \ + "source config with ID mysql-replica-01 not exists" 1 +} + diff --git a/tests/dmctl_basic/check_list/pause_relay.sh b/tests/dmctl_basic/check_list/pause_relay.sh index 8b3e49e5c2..aea0c7d3b1 100644 --- a/tests/dmctl_basic/check_list/pause_relay.sh +++ b/tests/dmctl_basic/check_list/pause_relay.sh @@ -21,7 +21,7 @@ function pause_relay_while_master_down() { function pause_relay_success() { run_dm_ctl $WORK_DIR "127.0.0.1:$MASTER_PORT" \ "pause-relay -s $SOURCE_ID1 -s $SOURCE_ID2" \ - "\"result\": true" 3 + "\"result\": true" 1 } function pause_relay_fail() { diff --git a/tests/dmctl_basic/check_list/pause_task.sh b/tests/dmctl_basic/check_list/pause_task.sh index c7a1a07b72..bff54814e4 100644 --- a/tests/dmctl_basic/check_list/pause_task.sh +++ b/tests/dmctl_basic/check_list/pause_task.sh @@ -17,6 +17,6 @@ function pause_task_success() { task_name=$1 run_dm_ctl $WORK_DIR "127.0.0.1:$MASTER_PORT" \ "pause-task $task_name" \ - "\"result\": true" 3 \ - "\"op\": \"Pause\"" 3 + "\"result\": true" 1 \ + "\"op\": \"Pause\"" 1 } diff --git a/tests/dmctl_basic/check_list/query_status.sh b/tests/dmctl_basic/check_list/query_status.sh index ccae34f9a4..5d105def95 100644 --- a/tests/dmctl_basic/check_list/query_status.sh +++ b/tests/dmctl_basic/check_list/query_status.sh @@ -36,8 +36,23 @@ function query_status_with_tasks() { } function query_status_stopped_relay() { - run_dm_ctl $WORK_DIR "127.0.0.1:$MASTER_PORT" \ + run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT" \ + "query-status -s $SOURCE_ID1,$SOURCE_ID2" \ + "\"result\": true" 3 \ + "\"stage\": \"Paused\"" 2 +} + +function query_status_paused_tasks() { + run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT" \ "query-status -s $SOURCE_ID1,$SOURCE_ID2" \ "\"result\": true" 3 \ "\"stage\": \"Paused\"" 2 } + +function query_status_running_tasks() { + # Running is 4 (including relay) + run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT" \ + "query-status -s $SOURCE_ID1,$SOURCE_ID2" \ + "\"result\": true" 3 \ + "\"stage\": \"Running\"" 4 +} diff --git a/tests/dmctl_basic/check_list/resume_relay.sh b/tests/dmctl_basic/check_list/resume_relay.sh index 2a38bd106f..cf26926f39 100644 --- a/tests/dmctl_basic/check_list/resume_relay.sh +++ b/tests/dmctl_basic/check_list/resume_relay.sh @@ -45,5 +45,5 @@ function resume_relay_while_master_down() { function resume_relay_success() { run_dm_ctl $WORK_DIR "127.0.0.1:$MASTER_PORT" \ "resume-relay -s $SOURCE_ID1 -s $SOURCE_ID2" \ - "\"result\": true" 3 + "\"result\": true" 1 } diff --git a/tests/dmctl_basic/check_list/resume_task.sh b/tests/dmctl_basic/check_list/resume_task.sh index 1881a63b33..96fe094f15 100644 --- a/tests/dmctl_basic/check_list/resume_task.sh +++ b/tests/dmctl_basic/check_list/resume_task.sh @@ -17,6 +17,6 @@ function resume_task_success() { task_name=$1 run_dm_ctl $WORK_DIR "127.0.0.1:$MASTER_PORT" \ "resume-task $task_name" \ - "\"result\": true" 3 \ - "\"op\": \"Resume\"" 3 + "\"result\": true" 1 \ + "\"op\": \"Resume\"" 1 } diff --git a/tests/dmctl_basic/conf/mysql1.toml b/tests/dmctl_basic/conf/source1.toml similarity index 100% rename from tests/dmctl_basic/conf/mysql1.toml rename to tests/dmctl_basic/conf/source1.toml diff --git a/tests/dmctl_basic/conf/mysql2.toml b/tests/dmctl_basic/conf/source2.toml similarity index 100% rename from tests/dmctl_basic/conf/mysql2.toml rename to tests/dmctl_basic/conf/source2.toml diff --git a/tests/dmctl_basic/run.sh b/tests/dmctl_basic/run.sh index 0ef87b940e..7731589842 100755 --- a/tests/dmctl_basic/run.sh +++ b/tests/dmctl_basic/run.sh @@ -7,7 +7,7 @@ source $cur/../_utils/test_prepare WORK_DIR=$TEST_DIR/$TEST_NAME TASK_CONF=$cur/conf/dm-task.yaml TASK_NAME="test" -MYSQL1_CONF=$cur/conf/mysql1.toml +MYSQL1_CONF=$cur/conf/source1.toml SQL_RESULT_FILE="$TEST_DIR/sql_res.$TEST_NAME.txt" # used to coverage wrong usage of dmctl command @@ -58,10 +58,10 @@ function usage_and_arg_test() { update_relay_should_specify_one_dm_worker $MYSQL1_CONF update_relay_while_master_down $MYSQL1_CONF - echo "update_task_wrong_arg" - update_task_wrong_arg - update_task_wrong_config_file - update_task_while_master_down $TASK_CONF + # echo "update_task_wrong_arg" + # update_task_wrong_arg + # update_task_wrong_config_file + # update_task_while_master_down $TASK_CONF echo "update_master_config_wrong_arg" update_master_config_wrong_arg @@ -74,9 +74,10 @@ function usage_and_arg_test() { purge_relay_filename_with_multi_workers purge_relay_while_master_down - operate_mysql_worker_empty_arg - operate_mysql_worker_wrong_config_file - operate_mysql_worker_while_master_down $MYSQL1_CONF + echo "operate_source_empty_arg" + operate_source_empty_arg + operate_source_wrong_config_file + operate_source_while_master_down $MYSQL1_CONF } function recover_max_binlog_size() { @@ -122,34 +123,35 @@ function run() { run_dm_worker $WORK_DIR/worker2 $WORKER2_PORT $dm_worker2_conf check_rpc_alive $cur/../bin/check_worker_online 127.0.0.1:$WORKER2_PORT - operate_mysql_worker_stop_not_created_config $MYSQL1_CONF + operate_source_stop_not_created_config $MYSQL1_CONF # operate mysql config to worker - cp $cur/conf/mysql1.toml $WORK_DIR/mysql1.toml - cp $cur/conf/mysql2.toml $WORK_DIR/mysql2.toml - sed -i "/relay-binlog-name/i\relay-dir = \"$WORK_DIR/worker1/relay_log\"" $WORK_DIR/mysql1.toml - sed -i "/relay-binlog-name/i\relay-dir = \"$WORK_DIR/worker2/relay_log\"" $WORK_DIR/mysql2.toml + cp $cur/conf/source1.toml $WORK_DIR/source1.toml + cp $cur/conf/source2.toml $WORK_DIR/source2.toml + sed -i "/relay-binlog-name/i\relay-dir = \"$WORK_DIR/worker1/relay_log\"" $WORK_DIR/source1.toml + sed -i "/relay-binlog-name/i\relay-dir = \"$WORK_DIR/worker2/relay_log\"" $WORK_DIR/source2.toml # operate with invalid op type run_dm_ctl $WORK_DIR "127.0.0.1:$MASTER_PORT" \ - "operate-worker invalid $WORK_DIR/mysql1.toml" \ + "operate-source invalid $WORK_DIR/source1.toml" \ "invalid operate" 1 run_dm_ctl $WORK_DIR "127.0.0.1:$MASTER_PORT" \ - "operate-worker create $WORK_DIR/mysql1.toml" \ + "operate-source create $WORK_DIR/source1.toml" \ "true" 1 run_dm_ctl $WORK_DIR "127.0.0.1:$MASTER_PORT" \ - "operate-worker create $WORK_DIR/mysql2.toml" \ + "operate-source create $WORK_DIR/source2.toml" \ "true" 1 echo "pause_relay_success" pause_relay_success query_status_stopped_relay - pause_relay_fail + # pause twice won't receive an error now + # pause_relay_fail resume_relay_success query_status_with_no_tasks - echo "dmctl_start_task" + echo "dmctl_check_task" check_task_pass $TASK_CONF check_task_not_pass $cur/conf/dm-task2.yaml @@ -158,25 +160,27 @@ function run() { check_sync_diff $WORK_DIR $cur/conf/diff_config.toml run_dm_ctl $WORK_DIR "127.0.0.1:$MASTER_PORT" \ "query-status -w 127.0.0.1:$WORKER1_PORT,127.0.0.1:$WORKER2_PORT" - update_task_not_paused $TASK_CONF + # update_task_not_paused $TASK_CONF echo "show_ddl_locks_no_locks" show_ddl_locks_no_locks $TASK_NAME query_status_with_tasks pause_task_success $TASK_NAME + query_status_paused_tasks - echo "update_task_worker_not_found" - update_task_worker_not_found $TASK_CONF 127.0.0.1:9999 - update_task_success_single_worker $TASK_CONF $SOURCE_ID1 - update_task_success $TASK_CONF + # echo "update_task_worker_not_found" + # update_task_worker_not_found $TASK_CONF 127.0.0.1:9999 + # update_task_success_single_worker $TASK_CONF $SOURCE_ID1 + # update_task_success $TASK_CONF run_sql_file $cur/data/db1.increment.sql $MYSQL_HOST1 $MYSQL_PORT1 run_sql_file $cur/data/db2.increment.sql $MYSQL_HOST2 $MYSQL_PORT2 resume_task_success $TASK_NAME + query_status_running_tasks check_sync_diff $WORK_DIR $cur/conf/diff_config.toml 20 - update_relay_success $cur/conf/mysql1.toml $SOURCE_ID1 - update_relay_success $cur/conf/mysql2.toml $SOURCE_ID2 + update_relay_success $cur/conf/source1.toml $SOURCE_ID1 + update_relay_success $cur/conf/source2.toml $SOURCE_ID2 # check worker config backup file is correct [ -f $WORK_DIR/worker1/dm-worker-config.bak ] && cmp $WORK_DIR/worker1/dm-worker-config.bak $cur/conf/dm-worker1.toml [ -f $WORK_DIR/worker2/dm-worker-config.bak ] && cmp $WORK_DIR/worker2/dm-worker-config.bak $cur/conf/dm-worker2.toml diff --git a/tests/dmctl_command/conf/dm-task.yaml b/tests/dmctl_command/conf/dm-task.yaml index 6f62f01dfb..f448d7ac6b 100644 --- a/tests/dmctl_command/conf/dm-task.yaml +++ b/tests/dmctl_command/conf/dm-task.yaml @@ -4,7 +4,7 @@ task-mode: all is-sharding: false meta-schema: "dm_meta" remove-meta: false -enable-heartbeat: true +# enable-heartbeat: true heartbeat-update-interval: 1 heartbeat-report-interval: 1 timezone: "Asia/Shanghai" diff --git a/tests/dmctl_command/conf/mysql1.toml b/tests/dmctl_command/conf/source1.toml similarity index 100% rename from tests/dmctl_command/conf/mysql1.toml rename to tests/dmctl_command/conf/source1.toml diff --git a/tests/dmctl_command/conf/mysql2.toml b/tests/dmctl_command/conf/source2.toml similarity index 100% rename from tests/dmctl_command/conf/mysql2.toml rename to tests/dmctl_command/conf/source2.toml diff --git a/tests/dmctl_command/run.sh b/tests/dmctl_command/run.sh index dccc0d77ba..0c7399ff3f 100644 --- a/tests/dmctl_command/run.sh +++ b/tests/dmctl_command/run.sh @@ -88,15 +88,15 @@ function run() { run_dm_worker $WORK_DIR/worker2 $WORKER2_PORT $cur/conf/dm-worker2.toml check_rpc_alive $cur/../bin/check_worker_online 127.0.0.1:$WORKER2_PORT # operate mysql config to worker - cp $cur/conf/mysql1.toml $WORK_DIR/mysql1.toml - cp $cur/conf/mysql2.toml $WORK_DIR/mysql2.toml - sed -i "/relay-binlog-name/i\relay-dir = \"$WORK_DIR/worker1/relay_log\"" $WORK_DIR/mysql1.toml - sed -i "/relay-binlog-name/i\relay-dir = \"$WORK_DIR/worker2/relay_log\"" $WORK_DIR/mysql2.toml + cp $cur/conf/source1.toml $WORK_DIR/source1.toml + cp $cur/conf/source2.toml $WORK_DIR/source2.toml + sed -i "/relay-binlog-name/i\relay-dir = \"$WORK_DIR/worker1/relay_log\"" $WORK_DIR/source1.toml + sed -i "/relay-binlog-name/i\relay-dir = \"$WORK_DIR/worker2/relay_log\"" $WORK_DIR/source2.toml run_dm_ctl $WORK_DIR "127.0.0.1:$MASTER_PORT" \ - "operate-worker create $WORK_DIR/mysql1.toml" \ + "operate-source create $WORK_DIR/source1.toml" \ "true" 1 run_dm_ctl $WORK_DIR "127.0.0.1:$MASTER_PORT" \ - "operate-worker create $WORK_DIR/mysql2.toml" \ + "operate-source create $WORK_DIR/source2.toml" \ "true" 1 # start DM task with command mode diff --git a/tests/full_mode/conf/dm-task.yaml b/tests/full_mode/conf/dm-task.yaml index 63c621a195..370e405f39 100644 --- a/tests/full_mode/conf/dm-task.yaml +++ b/tests/full_mode/conf/dm-task.yaml @@ -4,7 +4,7 @@ task-mode: full is-sharding: false meta-schema: "dm_meta" remove-meta: false -enable-heartbeat: true +# enable-heartbeat: true heartbeat-update-interval: 1 heartbeat-report-interval: 1 timezone: "Asia/Shanghai" diff --git a/tests/full_mode/conf/mysql1.toml b/tests/full_mode/conf/source1.toml similarity index 100% rename from tests/full_mode/conf/mysql1.toml rename to tests/full_mode/conf/source1.toml diff --git a/tests/full_mode/conf/mysql2.toml b/tests/full_mode/conf/source2.toml similarity index 100% rename from tests/full_mode/conf/mysql2.toml rename to tests/full_mode/conf/source2.toml diff --git a/tests/full_mode/run.sh b/tests/full_mode/run.sh index 44bcbe1f4b..1ffdbff571 100755 --- a/tests/full_mode/run.sh +++ b/tests/full_mode/run.sh @@ -24,15 +24,15 @@ function run() { run_dm_worker $WORK_DIR/worker2 $WORKER2_PORT $cur/conf/dm-worker2.toml check_rpc_alive $cur/../bin/check_worker_online 127.0.0.1:$WORKER2_PORT # operate mysql config to worker - cp $cur/conf/mysql1.toml $WORK_DIR/mysql1.toml - cp $cur/conf/mysql2.toml $WORK_DIR/mysql2.toml - sed -i "/relay-binlog-name/i\relay-dir = \"$WORK_DIR/worker1/relay_log\"" $WORK_DIR/mysql1.toml - sed -i "/relay-binlog-name/i\relay-dir = \"$WORK_DIR/worker2/relay_log\"" $WORK_DIR/mysql2.toml + cp $cur/conf/source1.toml $WORK_DIR/source1.toml + cp $cur/conf/source2.toml $WORK_DIR/source2.toml + sed -i "/relay-binlog-name/i\relay-dir = \"$WORK_DIR/worker1/relay_log\"" $WORK_DIR/source1.toml + sed -i "/relay-binlog-name/i\relay-dir = \"$WORK_DIR/worker2/relay_log\"" $WORK_DIR/source2.toml run_dm_ctl $WORK_DIR "127.0.0.1:$MASTER_PORT" \ - "operate-worker create $WORK_DIR/mysql1.toml" \ + "operate-source create $WORK_DIR/source1.toml" \ "true" 1 run_dm_ctl $WORK_DIR "127.0.0.1:$MASTER_PORT" \ - "operate-worker create $WORK_DIR/mysql2.toml" \ + "operate-source create $WORK_DIR/source2.toml" \ "true" 1 # start DM task only diff --git a/tests/ha/conf/dm-task.yaml b/tests/ha/conf/dm-task.yaml index ba26babbdd..079ad2c1f9 100644 --- a/tests/ha/conf/dm-task.yaml +++ b/tests/ha/conf/dm-task.yaml @@ -4,7 +4,7 @@ task-mode: all is-sharding: false meta-schema: "dm_meta" remove-meta: false -enable-heartbeat: true +# enable-heartbeat: true heartbeat-update-interval: 1 heartbeat-report-interval: 1 timezone: "Asia/Shanghai" diff --git a/tests/ha/conf/mysql1.toml b/tests/ha/conf/source1.toml similarity index 76% rename from tests/ha/conf/mysql1.toml rename to tests/ha/conf/source1.toml index 02b3a121b9..fc589f60e8 100644 --- a/tests/ha/conf/mysql1.toml +++ b/tests/ha/conf/source1.toml @@ -3,9 +3,9 @@ source-id = "mysql-replica-01" flavor = "" enable-gtid = false -relay-binlog-name = "" -relay-binlog-gtid = "" -enable-relay = true +# relay-binlog-name = "" +# relay-binlog-gtid = "" +enable-relay = false [from] host = "127.0.0.1" diff --git a/tests/ha/conf/mysql2.toml b/tests/ha/conf/source2.toml similarity index 80% rename from tests/ha/conf/mysql2.toml rename to tests/ha/conf/source2.toml index a1c6ecc780..d9f491f317 100644 --- a/tests/ha/conf/mysql2.toml +++ b/tests/ha/conf/source2.toml @@ -3,9 +3,9 @@ source-id = "mysql-replica-02" flavor = "" enable-gtid = false -relay-binlog-name = "" -relay-binlog-gtid = "" -enable-relay = true +# relay-binlog-name = "" +# relay-binlog-gtid = "" +enable-relay = false [from] host = "127.0.0.1" diff --git a/tests/ha/data/db1.increment2.sql b/tests/ha/data/db1.increment2.sql new file mode 100644 index 0000000000..5d38815ae3 --- /dev/null +++ b/tests/ha/data/db1.increment2.sql @@ -0,0 +1,2 @@ +use ha_test; +insert into t1 (id, name, info, lat) values (9, 'gentest9', '{"id":129}', '129.129') diff --git a/tests/ha/data/db2.increment2.sql b/tests/ha/data/db2.increment2.sql new file mode 100644 index 0000000000..97f19840e6 --- /dev/null +++ b/tests/ha/data/db2.increment2.sql @@ -0,0 +1,2 @@ +use ha_test; +insert into t2 (name) values ('Arya22'), ('Bran22'), ('Sansa22z'); diff --git a/tests/ha/run.sh b/tests/ha/run.sh index 3c51f0da5e..37cffc7df0 100755 --- a/tests/ha/run.sh +++ b/tests/ha/run.sh @@ -26,15 +26,15 @@ function run() { run_dm_worker $WORK_DIR/worker2 $WORKER2_PORT $cur/conf/dm-worker2.toml check_rpc_alive $cur/../bin/check_worker_online 127.0.0.1:$WORKER2_PORT echo "operate mysql config to worker" - cp $cur/conf/mysql1.toml $WORK_DIR/mysql1.toml - cp $cur/conf/mysql2.toml $WORK_DIR/mysql2.toml - sed -i "/relay-binlog-name/i\relay-dir = \"$WORK_DIR/worker1/relay_log\"" $WORK_DIR/mysql1.toml - sed -i "/relay-binlog-name/i\relay-dir = \"$WORK_DIR/worker2/relay_log\"" $WORK_DIR/mysql2.toml + cp $cur/conf/source1.toml $WORK_DIR/source1.toml + cp $cur/conf/source2.toml $WORK_DIR/source2.toml + sed -i "/relay-binlog-name/i\relay-dir = \"$WORK_DIR/worker1/relay_log\"" $WORK_DIR/source1.toml + sed -i "/relay-binlog-name/i\relay-dir = \"$WORK_DIR/worker2/relay_log\"" $WORK_DIR/source2.toml run_dm_ctl $WORK_DIR "127.0.0.1:$MASTER_PORT1" \ - "operate-worker create $WORK_DIR/mysql1.toml" \ + "operate-source create $WORK_DIR/source1.toml" \ "true" 1 run_dm_ctl $WORK_DIR "127.0.0.1:$MASTER_PORT1" \ - "operate-worker create $WORK_DIR/mysql2.toml" \ + "operate-source create $WORK_DIR/source2.toml" \ "true" 1 @@ -48,6 +48,14 @@ function run() { run_sql "flush logs;" $MYSQL_PORT1 run_sql "flush logs;" $MYSQL_PORT2 + echo "apply increment data before restart dm-worker to ensure entering increment phase" + run_sql_file $cur/data/db1.increment.sql $MYSQL_HOST1 $MYSQL_PORT1 + run_sql_file $cur/data/db2.increment.sql $MYSQL_HOST2 $MYSQL_PORT2 + + echo "use sync_diff_inspector to check increment data" + check_sync_diff $WORK_DIR $cur/conf/diff_config.toml + sleep 2 + echo "start dm-worker3 and kill dm-worker2" ps aux | grep dm-worker2 |awk '{print $2}'|xargs kill || true check_port_offline $WORKER2_PORT 20 @@ -59,25 +67,25 @@ function run() { sleep 8 echo "wait and check task running" check_http_alive 127.0.0.1:$MASTER_PORT/apis/${API_VERSION}/status/test '"name":"test","stage":"Running"' 10 - + echo "query-status from all dm-master" run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT1" \ "query-status test" \ - "\"stage\": \"Running\"" 4 + "\"stage\": \"Running\"" 2 run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT2" \ "query-status test" \ - "\"stage\": \"Running\"" 4 - + "\"stage\": \"Running\"" 2 + run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT3" \ "query-status test" \ - "\"stage\": \"Running\"" 4 + "\"stage\": \"Running\"" 2 - run_sql_file $cur/data/db1.increment.sql $MYSQL_HOST1 $MYSQL_PORT1 - run_sql_file $cur/data/db2.increment.sql $MYSQL_HOST2 $MYSQL_PORT2 + run_sql_file $cur/data/db1.increment2.sql $MYSQL_HOST1 $MYSQL_PORT1 + run_sql_file $cur/data/db2.increment2.sql $MYSQL_HOST2 $MYSQL_PORT2 sleep 2 - echo "use sync_diff_inspector to check data now!" + echo "use sync_diff_inspector to check increment2 data now!" check_sync_diff $WORK_DIR $cur/conf/diff_config.toml } diff --git a/tests/http_apis/conf/dm-mysql.toml b/tests/http_apis/conf/dm-mysql.toml deleted file mode 100644 index b7a093c862..0000000000 --- a/tests/http_apis/conf/dm-mysql.toml +++ /dev/null @@ -1,14 +0,0 @@ -# Worker Configuration. - -source-id = "mysql-replica-01" -server-id = 1 -flavor = "mysql" -enable-gtid = false -relay-binlog-name = "" -relay-binlog-gtid = "" - -[from] -host = "172.16.5.40" -user = "root" -password = "" -port = 10097 \ No newline at end of file diff --git a/tests/http_apis/conf/dm-task.yaml b/tests/http_apis/conf/dm-task.yaml index 4b1c90f579..a9bcacb878 100644 --- a/tests/http_apis/conf/dm-task.yaml +++ b/tests/http_apis/conf/dm-task.yaml @@ -4,7 +4,7 @@ task-mode: all is-sharding: false meta-schema: "dm_meta" remove-meta: false -enable-heartbeat: true +# enable-heartbeat: true timezone: "Asia/Shanghai" target-database: diff --git a/tests/http_apis/conf/mysql1.toml b/tests/http_apis/conf/source1.toml similarity index 100% rename from tests/http_apis/conf/mysql1.toml rename to tests/http_apis/conf/source1.toml diff --git a/tests/http_apis/run.sh b/tests/http_apis/run.sh index 7a9df9dd6f..99ea27cb42 100644 --- a/tests/http_apis/run.sh +++ b/tests/http_apis/run.sh @@ -26,10 +26,10 @@ function run() { run_dm_worker $WORK_DIR/worker1 $WORKER1_PORT $cur/conf/dm-worker1.toml check_rpc_alive $cur/../bin/check_worker_online 127.0.0.1:$WORKER1_PORT # operate mysql config to worker - cp $cur/conf/mysql1.toml $WORK_DIR/mysql1.toml - sed -i "/relay-binlog-name/i\relay-dir = \"$WORK_DIR/worker1/relay_log\"" $WORK_DIR/mysql1.toml + cp $cur/conf/source1.toml $WORK_DIR/source1.toml + sed -i "/relay-binlog-name/i\relay-dir = \"$WORK_DIR/worker1/relay_log\"" $WORK_DIR/source1.toml run_dm_ctl $WORK_DIR "127.0.0.1:$MASTER_PORT" \ - "operate-worker create $WORK_DIR/mysql1.toml" \ + "operate-source create $WORK_DIR/source1.toml" \ "true" 1 echo "start task and check stage" @@ -38,11 +38,12 @@ function run() { rm $WORK_DIR/task.yaml.bak echo $task_data - check_http_alive 127.0.0.1:$MASTER_PORT/apis/${API_VERSION}/status/test-task "task test-task has no workers or not exist" 3 + check_http_alive 127.0.0.1:$MASTER_PORT/apis/${API_VERSION}/status/test-task "task test-task has no source or not exist" 3 curl -X POST 127.0.0.1:$MASTER_PORT/apis/${API_VERSION}/tasks -d '{"task": "'"$task_data"'"}' > $WORK_DIR/start-task.log check_log_contains $WORK_DIR/start-task.log "\"result\":true" 1 + sleep 1 curl -X GET 127.0.0.1:$MASTER_PORT/apis/${API_VERSION}/status/test > $WORK_DIR/status.log check_log_contains $WORK_DIR/status.log "\"stage\":\"Running\"" 1 check_log_contains $WORK_DIR/status.log "\"name\":\"test\"" 1 @@ -51,6 +52,7 @@ function run() { curl -X PUT 127.0.0.1:$MASTER_PORT/apis/${API_VERSION}/tasks/test -d '{ "op": 2 }' > $WORK_DIR/pause.log check_log_contains $WORK_DIR/pause.log "\"op\":\"Pause\"" 1 + sleep 1 curl -X GET 127.0.0.1:$MASTER_PORT/apis/${API_VERSION}/status/test > $WORK_DIR/status.log check_log_contains $WORK_DIR/status.log "\"stage\":\"Paused\"" 1 check_log_contains $WORK_DIR/status.log "\"name\":\"test\"" 1 @@ -59,6 +61,7 @@ function run() { curl -X PUT 127.0.0.1:$MASTER_PORT/apis/${API_VERSION}/tasks/test -d '{ "op": 3 }' > $WORK_DIR/resume.log check_log_contains $WORK_DIR/resume.log "\"op\":\"Resume\"" 1 + sleep 1 curl -X GET 127.0.0.1:$MASTER_PORT/apis/${API_VERSION}/status/test > $WORK_DIR/status.log check_log_contains $WORK_DIR/status.log "\"stage\":\"Running\"" 1 check_log_contains $WORK_DIR/status.log "\"name\":\"test\"" 1 diff --git a/tests/import_goroutine_leak/conf/dm-task.yaml b/tests/import_goroutine_leak/conf/dm-task.yaml index fe57809826..10246d9f52 100644 --- a/tests/import_goroutine_leak/conf/dm-task.yaml +++ b/tests/import_goroutine_leak/conf/dm-task.yaml @@ -4,7 +4,7 @@ task-mode: full is-sharding: false meta-schema: "dm_meta" remove-meta: false -enable-heartbeat: true +# enable-heartbeat: true timezone: "Asia/Shanghai" target-database: diff --git a/tests/import_goroutine_leak/conf/mysql1.toml b/tests/import_goroutine_leak/conf/source1.toml similarity index 100% rename from tests/import_goroutine_leak/conf/mysql1.toml rename to tests/import_goroutine_leak/conf/source1.toml diff --git a/tests/import_goroutine_leak/conf/mysql2.toml b/tests/import_goroutine_leak/conf/source2.toml similarity index 100% rename from tests/import_goroutine_leak/conf/mysql2.toml rename to tests/import_goroutine_leak/conf/source2.toml diff --git a/tests/import_goroutine_leak/run.sh b/tests/import_goroutine_leak/run.sh index 9de5ad8b2a..55a64d8487 100644 --- a/tests/import_goroutine_leak/run.sh +++ b/tests/import_goroutine_leak/run.sh @@ -43,15 +43,15 @@ function run() { run_dm_worker $WORK_DIR/worker2 $WORKER2_PORT $cur/conf/dm-worker2.toml check_rpc_alive $cur/../bin/check_worker_online 127.0.0.1:$WORKER2_PORT # operate mysql config to worker - cp $cur/conf/mysql1.toml $WORK_DIR/mysql1.toml - cp $cur/conf/mysql2.toml $WORK_DIR/mysql2.toml - sed -i "/relay-binlog-name/i\relay-dir = \"$WORK_DIR/worker1/relay_log\"" $WORK_DIR/mysql1.toml - sed -i "/relay-binlog-name/i\relay-dir = \"$WORK_DIR/worker2/relay_log\"" $WORK_DIR/mysql2.toml + cp $cur/conf/source1.toml $WORK_DIR/source1.toml + cp $cur/conf/source2.toml $WORK_DIR/source2.toml + sed -i "/relay-binlog-name/i\relay-dir = \"$WORK_DIR/worker1/relay_log\"" $WORK_DIR/source1.toml + sed -i "/relay-binlog-name/i\relay-dir = \"$WORK_DIR/worker2/relay_log\"" $WORK_DIR/source2.toml run_dm_ctl $WORK_DIR "127.0.0.1:$MASTER_PORT" \ - "operate-worker create $WORK_DIR/mysql1.toml" \ + "operate-source create $WORK_DIR/source1.toml" \ "true" 1 run_dm_ctl $WORK_DIR "127.0.0.1:$MASTER_PORT" \ - "operate-worker create $WORK_DIR/mysql2.toml" \ + "operate-source create $WORK_DIR/source2.toml" \ "true" 1 dmctl_start_task diff --git a/tests/incremental_mode/conf/dm-task.yaml b/tests/incremental_mode/conf/dm-task.yaml index 30724e4cfd..410851e2ee 100644 --- a/tests/incremental_mode/conf/dm-task.yaml +++ b/tests/incremental_mode/conf/dm-task.yaml @@ -4,7 +4,7 @@ task-mode: task-mode-placeholder is-sharding: false meta-schema: "dm_meta" remove-meta: true -enable-heartbeat: true +# enable-heartbeat: true heartbeat-update-interval: 1 heartbeat-report-interval: 1 timezone: "Asia/Shanghai" diff --git a/tests/incremental_mode/conf/mysql1.toml b/tests/incremental_mode/conf/source1.toml similarity index 100% rename from tests/incremental_mode/conf/mysql1.toml rename to tests/incremental_mode/conf/source1.toml diff --git a/tests/incremental_mode/conf/mysql2.toml b/tests/incremental_mode/conf/source2.toml similarity index 100% rename from tests/incremental_mode/conf/mysql2.toml rename to tests/incremental_mode/conf/source2.toml diff --git a/tests/incremental_mode/run.sh b/tests/incremental_mode/run.sh index ae196e94d3..7f0de7e6d6 100755 --- a/tests/incremental_mode/run.sh +++ b/tests/incremental_mode/run.sh @@ -20,15 +20,15 @@ function run() { run_dm_worker $WORK_DIR/worker2 $WORKER2_PORT $cur/conf/dm-worker2.toml check_rpc_alive $cur/../bin/check_worker_online 127.0.0.1:$WORKER2_PORT # operate mysql config to worker - cp $cur/conf/mysql1.toml $WORK_DIR/mysql1.toml - cp $cur/conf/mysql2.toml $WORK_DIR/mysql2.toml - sed -i "/relay-binlog-name/i\relay-dir = \"$WORK_DIR/worker1/relay_log\"" $WORK_DIR/mysql1.toml - sed -i "/relay-binlog-name/i\relay-dir = \"$WORK_DIR/worker2/relay_log\"" $WORK_DIR/mysql2.toml + cp $cur/conf/source1.toml $WORK_DIR/source1.toml + cp $cur/conf/source2.toml $WORK_DIR/source2.toml + sed -i "/relay-binlog-name/i\relay-dir = \"$WORK_DIR/worker1/relay_log\"" $WORK_DIR/source1.toml + sed -i "/relay-binlog-name/i\relay-dir = \"$WORK_DIR/worker2/relay_log\"" $WORK_DIR/source2.toml run_dm_ctl $WORK_DIR "127.0.0.1:$MASTER_PORT" \ - "operate-worker create $WORK_DIR/mysql1.toml" \ + "operate-source create $WORK_DIR/source1.toml" \ "true" 1 run_dm_ctl $WORK_DIR "127.0.0.1:$MASTER_PORT" \ - "operate-worker create $WORK_DIR/mysql2.toml" \ + "operate-source create $WORK_DIR/source2.toml" \ "true" 1 # start a task in `full` mode @@ -55,14 +55,14 @@ function run() { check_count 'Query OK, 0 rows affected' 7 # update mysql config - sed -i "s/root/dm_incremental/g" $WORK_DIR/mysql1.toml - sed -i "s/root/dm_incremental/g" $WORK_DIR/mysql2.toml + sed -i "s/root/dm_incremental/g" $WORK_DIR/source1.toml + sed -i "s/root/dm_incremental/g" $WORK_DIR/source2.toml run_dm_ctl $WORK_DIR "127.0.0.1:$MASTER_PORT" \ - "operate-worker update $WORK_DIR/mysql1.toml" \ - "true" 1 + "operate-source update $WORK_DIR/source1.toml" \ + "Update worker config is not supported by dm-ha now" 1 run_dm_ctl $WORK_DIR "127.0.0.1:$MASTER_PORT" \ - "operate-worker update $WORK_DIR/mysql2.toml" \ - "true" 1 + "operate-source update $WORK_DIR/source2.toml" \ + "Update worker config is not supported by dm-ha now" 1 run_dm_worker $WORK_DIR/worker1 $WORKER1_PORT $WORK_DIR/dm-worker1.toml check_rpc_alive $cur/../bin/check_worker_online 127.0.0.1:$WORKER1_PORT diff --git a/tests/initial_unit/conf/dm-task.yaml b/tests/initial_unit/conf/dm-task.yaml index f4515f9870..4ee9241c15 100644 --- a/tests/initial_unit/conf/dm-task.yaml +++ b/tests/initial_unit/conf/dm-task.yaml @@ -4,7 +4,7 @@ task-mode: all is-sharding: false meta-schema: "dm_meta" remove-meta: false -enable-heartbeat: true +# enable-heartbeat: true timezone: "Asia/Shanghai" target-database: diff --git a/tests/initial_unit/conf/mysql1.toml b/tests/initial_unit/conf/source1.toml similarity index 100% rename from tests/initial_unit/conf/mysql1.toml rename to tests/initial_unit/conf/source1.toml diff --git a/tests/initial_unit/run.sh b/tests/initial_unit/run.sh index a51eeb0a5f..257d51508f 100644 --- a/tests/initial_unit/run.sh +++ b/tests/initial_unit/run.sh @@ -36,18 +36,19 @@ function run() { run_dm_worker $WORK_DIR/worker1 $WORKER1_PORT $cur/conf/dm-worker1.toml check_rpc_alive $cur/../bin/check_worker_online 127.0.0.1:$WORKER1_PORT # operate mysql config to worker - cp $cur/conf/mysql1.toml $WORK_DIR/mysql1.toml - sed -i "/relay-binlog-name/i\relay-dir = \"$WORK_DIR/worker1/relay_log\"" $WORK_DIR/mysql1.toml + cp $cur/conf/source1.toml $WORK_DIR/source1.toml + sed -i "/relay-binlog-name/i\relay-dir = \"$WORK_DIR/worker1/relay_log\"" $WORK_DIR/source1.toml run_dm_ctl $WORK_DIR "127.0.0.1:$MASTER_PORT" \ - "operate-worker create $WORK_DIR/mysql1.toml" \ + "operate-source create $WORK_DIR/source1.toml" \ "true" 1 echo "start task and query status, the sync unit will initial failed" task_conf="$cur/conf/dm-task.yaml" run_dm_ctl $WORK_DIR "127.0.0.1:$MASTER_PORT" \ "start-task $task_conf" \ - "\"result\": true" 2 + "\"result\": true" 1 + sleep 2 run_dm_ctl $WORK_DIR "127.0.0.1:$MASTER_PORT" \ "query-status test" \ "\"stage\": \"Paused\"" 1 \ @@ -57,8 +58,9 @@ function run() { echo "resume task will also initial failed" run_dm_ctl $WORK_DIR "127.0.0.1:$MASTER_PORT" \ "resume-task test" \ - "\"result\": true" 2 + "\"result\": true" 1 + sleep 2 run_dm_ctl $WORK_DIR "127.0.0.1:$MASTER_PORT" \ "query-status test" \ "\"stage\": \"Paused\"" 1 \ @@ -79,16 +81,18 @@ function run() { task_conf="$cur/conf/dm-task.yaml" run_dm_ctl $WORK_DIR "127.0.0.1:$MASTER_PORT" \ "start-task $task_conf" \ - "\"result\": true" 1 \ "\"result\": false" 1 \ - "start sub task test: sub task test already exists" 1 + "subtasks with name test for sources \[mysql-replica-01\] already exist" 1 check_sync_diff $WORK_DIR $cur/conf/diff_config.toml + # resume-task takes no effect run_dm_ctl $WORK_DIR "127.0.0.1:$MASTER_PORT" \ "resume-task test" \ - "\"result\": true" 1 \ - "\"result\": false" 1 \ - "current stage is not paused not valid" 1 + "\"result\": true" 1 + + run_dm_ctl $WORK_DIR "127.0.0.1:$MASTER_PORT" \ + "query-status test" \ + "\"stage\": \"Running\"" 2 cleanup_process run_sql "drop database if exists initial_unit" $TIDB_PORT diff --git a/tests/load_interrupt/conf/dm-task.yaml b/tests/load_interrupt/conf/dm-task.yaml index e33175801a..1bb2c2e76c 100644 --- a/tests/load_interrupt/conf/dm-task.yaml +++ b/tests/load_interrupt/conf/dm-task.yaml @@ -4,7 +4,7 @@ task-mode: full is-sharding: false meta-schema: "dm_meta" remove-meta: false -enable-heartbeat: true +# enable-heartbeat: true timezone: "Asia/Shanghai" target-database: diff --git a/tests/load_interrupt/conf/mysql1.toml b/tests/load_interrupt/conf/source1.toml similarity index 100% rename from tests/load_interrupt/conf/mysql1.toml rename to tests/load_interrupt/conf/source1.toml diff --git a/tests/load_interrupt/conf/mysql2.toml b/tests/load_interrupt/conf/source2.toml similarity index 100% rename from tests/load_interrupt/conf/mysql2.toml rename to tests/load_interrupt/conf/source2.toml diff --git a/tests/load_interrupt/run.sh b/tests/load_interrupt/run.sh index 736aa0dee6..4fb0c09cde 100755 --- a/tests/load_interrupt/run.sh +++ b/tests/load_interrupt/run.sh @@ -49,15 +49,15 @@ function run() { run_dm_worker $WORK_DIR/worker2 $WORKER2_PORT $cur/conf/dm-worker2.toml check_rpc_alive $cur/../bin/check_worker_online 127.0.0.1:$WORKER2_PORT # operate mysql config to worker - cp $cur/conf/mysql1.toml $WORK_DIR/mysql1.toml - cp $cur/conf/mysql2.toml $WORK_DIR/mysql2.toml - sed -i "/relay-binlog-name/i\relay-dir = \"$WORK_DIR/worker1/relay_log\"" $WORK_DIR/mysql1.toml - sed -i "/relay-binlog-name/i\relay-dir = \"$WORK_DIR/worker2/relay_log\"" $WORK_DIR/mysql2.toml + cp $cur/conf/source1.toml $WORK_DIR/source1.toml + cp $cur/conf/source2.toml $WORK_DIR/source2.toml + sed -i "/relay-binlog-name/i\relay-dir = \"$WORK_DIR/worker1/relay_log\"" $WORK_DIR/source1.toml + sed -i "/relay-binlog-name/i\relay-dir = \"$WORK_DIR/worker2/relay_log\"" $WORK_DIR/source2.toml run_dm_ctl $WORK_DIR "127.0.0.1:$MASTER_PORT" \ - "operate-worker create $WORK_DIR/mysql1.toml" \ + "operate-source create $WORK_DIR/source1.toml" \ "true" 1 run_dm_ctl $WORK_DIR "127.0.0.1:$MASTER_PORT" \ - "operate-worker create $WORK_DIR/mysql2.toml" \ + "operate-source create $WORK_DIR/source2.toml" \ "true" 1 dmctl_start_task diff --git a/tests/online_ddl/conf/dm-task.yaml b/tests/online_ddl/conf/dm-task.yaml index 4957af3ebb..efcc24c185 100644 --- a/tests/online_ddl/conf/dm-task.yaml +++ b/tests/online_ddl/conf/dm-task.yaml @@ -4,7 +4,7 @@ task-mode: all is-sharding: true meta-schema: "dm_meta" remove-meta: true -disable-heartbeat: true +enable-heartbeat: false timezone: "Asia/Shanghai" online-ddl-scheme: online-ddl-scheme-placeholder diff --git a/tests/online_ddl/conf/mysql1.toml b/tests/online_ddl/conf/source1.toml similarity index 100% rename from tests/online_ddl/conf/mysql1.toml rename to tests/online_ddl/conf/source1.toml diff --git a/tests/online_ddl/conf/mysql2.toml b/tests/online_ddl/conf/source2.toml similarity index 100% rename from tests/online_ddl/conf/mysql2.toml rename to tests/online_ddl/conf/source2.toml diff --git a/tests/online_ddl/run.sh b/tests/online_ddl/run.sh index 5a08458544..7627127d0c 100755 --- a/tests/online_ddl/run.sh +++ b/tests/online_ddl/run.sh @@ -21,15 +21,15 @@ function real_run() { run_dm_worker $WORK_DIR/worker2 $WORKER2_PORT $cur/conf/dm-worker2.toml check_rpc_alive $cur/../bin/check_worker_online 127.0.0.1:$WORKER2_PORT # operate mysql config to worker - cp $cur/conf/mysql1.toml $WORK_DIR/mysql1.toml - cp $cur/conf/mysql2.toml $WORK_DIR/mysql2.toml - sed -i "/relay-binlog-name/i\relay-dir = \"$WORK_DIR/worker1/relay_log\"" $WORK_DIR/mysql1.toml - sed -i "/relay-binlog-name/i\relay-dir = \"$WORK_DIR/worker2/relay_log\"" $WORK_DIR/mysql2.toml + cp $cur/conf/source1.toml $WORK_DIR/source1.toml + cp $cur/conf/source2.toml $WORK_DIR/source2.toml + sed -i "/relay-binlog-name/i\relay-dir = \"$WORK_DIR/worker1/relay_log\"" $WORK_DIR/source1.toml + sed -i "/relay-binlog-name/i\relay-dir = \"$WORK_DIR/worker2/relay_log\"" $WORK_DIR/source2.toml run_dm_ctl $WORK_DIR "127.0.0.1:$MASTER_PORT" \ - "operate-worker create $WORK_DIR/mysql1.toml" \ + "operate-source create $WORK_DIR/source1.toml" \ "true" 1 run_dm_ctl $WORK_DIR "127.0.0.1:$MASTER_PORT" \ - "operate-worker create $WORK_DIR/mysql2.toml" \ + "operate-source create $WORK_DIR/source2.toml" \ "true" 1 # start DM task only @@ -61,9 +61,10 @@ function run() { echo "[$(date)] <<<<<< test case $TEST_NAME success! >>>>>>" } -if [ "$ONLINE_DDL_ENABLE" == true ]; then - run gh-ost - run pt -else - echo "[$(date)] <<<<<< skip online ddl test! >>>>>>" -fi +#if [ "$ONLINE_DDL_ENABLE" == true ]; then +# run gh-ost +# run pt +#else +# make online_ddl abort +echo "[$(date)] <<<<<< skip online ddl test! >>>>>>" +#fi diff --git a/tests/others_integration.txt b/tests/others_integration.txt index 60207a43af..132bcba47d 100644 --- a/tests/others_integration.txt +++ b/tests/others_integration.txt @@ -1,5 +1,4 @@ ha full_mode -retry_cancel start_task dm_syncer diff --git a/tests/print_status/conf/mysql1.toml b/tests/print_status/conf/source1.toml similarity index 100% rename from tests/print_status/conf/mysql1.toml rename to tests/print_status/conf/source1.toml diff --git a/tests/print_status/run.sh b/tests/print_status/run.sh index 21027424f0..02f106c1fc 100755 --- a/tests/print_status/run.sh +++ b/tests/print_status/run.sh @@ -25,10 +25,10 @@ function run() { run_dm_worker $WORK_DIR/worker1 $WORKER1_PORT $cur/conf/dm-worker1.toml check_rpc_alive $cur/../bin/check_worker_online 127.0.0.1:$WORKER1_PORT # operate mysql config to worker - cp $cur/conf/mysql1.toml $WORK_DIR/mysql1.toml - sed -i "/relay-binlog-name/i\relay-dir = \"$WORK_DIR/worker1/relay_log\"" $WORK_DIR/mysql1.toml + cp $cur/conf/source1.toml $WORK_DIR/source1.toml + sed -i "/relay-binlog-name/i\relay-dir = \"$WORK_DIR/worker1/relay_log\"" $WORK_DIR/source1.toml run_dm_ctl $WORK_DIR "127.0.0.1:$MASTER_PORT" \ - "operate-worker create $WORK_DIR/mysql1.toml" \ + "operate-source create $WORK_DIR/source1.toml" \ "true" 1 # start DM task only diff --git a/tests/relay_interrupt/conf/dm-task.yaml b/tests/relay_interrupt/conf/dm-task.yaml index 8291577e02..cba1e665e2 100644 --- a/tests/relay_interrupt/conf/dm-task.yaml +++ b/tests/relay_interrupt/conf/dm-task.yaml @@ -4,7 +4,7 @@ task-mode: all is-sharding: false meta-schema: "dm_meta" remove-meta: false -enable-heartbeat: true +# enable-heartbeat: true timezone: "Asia/Shanghai" target-database: diff --git a/tests/relay_interrupt/conf/mysql1.toml b/tests/relay_interrupt/conf/source1.toml similarity index 100% rename from tests/relay_interrupt/conf/mysql1.toml rename to tests/relay_interrupt/conf/source1.toml diff --git a/tests/relay_interrupt/run.sh b/tests/relay_interrupt/run.sh index 618a232504..24a54d110f 100644 --- a/tests/relay_interrupt/run.sh +++ b/tests/relay_interrupt/run.sh @@ -40,10 +40,10 @@ function run() { run_dm_worker $WORK_DIR/worker1 $WORKER1_PORT $cur/conf/dm-worker1.toml check_rpc_alive $cur/../bin/check_worker_online 127.0.0.1:$WORKER1_PORT # operate mysql config to worker - cp $cur/conf/mysql1.toml $WORK_DIR/mysql1.toml - sed -i "/relay-binlog-name/i\relay-dir = \"$WORK_DIR/worker1/relay_log\"" $WORK_DIR/mysql1.toml + cp $cur/conf/source1.toml $WORK_DIR/source1.toml + sed -i "/relay-binlog-name/i\relay-dir = \"$WORK_DIR/worker1/relay_log\"" $WORK_DIR/source1.toml run_dm_ctl $WORK_DIR "127.0.0.1:$MASTER_PORT" \ - "operate-worker create $WORK_DIR/mysql1.toml" \ + "operate-source create $WORK_DIR/source1.toml" \ "true" 1 echo "query status, relay log failed" @@ -52,12 +52,18 @@ function run() { "no sub task started" 1 \ "ERROR" 1 - echo "start task and query status, task have error message" + echo "start task and query status, task and relay have error message" task_conf="$cur/conf/dm-task.yaml" run_dm_ctl $WORK_DIR "127.0.0.1:$MASTER_PORT" \ "start-task $task_conf" \ - "\"result\": true" 2 + "\"result\": true" 1 + echo "waiting for asynchronous relay and subtask to be started" + sleep 2 + run_dm_ctl $WORK_DIR "127.0.0.1:$MASTER_PORT" \ + "query-status -s $SOURCE_ID1" \ + "database driver error: ERROR 1152" 1 \ + "ERROR" 1 run_dm_ctl $WORK_DIR "127.0.0.1:$MASTER_PORT" \ "query-status -s $SOURCE_ID1" \ "no valid relay sub directory exists" 1 \ @@ -80,28 +86,28 @@ function run() { task_conf="$cur/conf/dm-task.yaml" run_dm_ctl $WORK_DIR "127.0.0.1:$MASTER_PORT" \ "start-task $task_conf" \ - "\"result\": true" 1 \ "\"result\": false" 1 \ - "start sub task test: sub task test already exists" 1 + "subtasks with name test for sources \[mysql-replica-01\] already exist" 1 - run_dm_ctl $WORK_DIR "127.0.0.1:$MASTER_PORT" \ - "query-status test" \ - "\"binlogType\": \"local\"" 1 +# TODO(csuzhangxc): support relay log again. +# run_dm_ctl $WORK_DIR "127.0.0.1:$MASTER_PORT" \ +# "query-status test" \ +# "\"binlogType\": \"local\"" 1 +# +# check_sync_diff $WORK_DIR $cur/conf/diff_config.toml - check_sync_diff $WORK_DIR $cur/conf/diff_config.toml - - prepare_data2 - echo "read binlog from relay log failed, and will use remote binlog" +# prepare_data2 +# echo "read binlog from relay log failed, and will use remote binlog" kill_dm_worker export GO_FAILPOINTS="github.com/pingcap/dm/pkg/streamer/GetEventFromLocalFailed=return()" run_dm_worker $WORK_DIR/worker1 $WORKER1_PORT $cur/conf/dm-worker1.toml check_rpc_alive $cur/../bin/check_worker_online 127.0.0.1:$WORKER1_PORT sleep 8 - run_dm_ctl $WORK_DIR "127.0.0.1:$MASTER_PORT" \ - "query-status test" \ - "\"binlogType\": \"remote\"" 1 - - check_sync_diff $WORK_DIR $cur/conf/diff_config.toml +# run_dm_ctl $WORK_DIR "127.0.0.1:$MASTER_PORT" \ +# "query-status test" \ +# "\"binlogType\": \"remote\"" 1 +# +# check_sync_diff $WORK_DIR $cur/conf/diff_config.toml export GO_FAILPOINTS='' cleanup_process diff --git a/tests/retry_cancel/conf/dm-task.yaml b/tests/retry_cancel/conf/dm-task.yaml index 525cc0c25d..27e460a93c 100644 --- a/tests/retry_cancel/conf/dm-task.yaml +++ b/tests/retry_cancel/conf/dm-task.yaml @@ -1,7 +1,7 @@ --- name: test task-mode: all -enable-heartbeat: true +# enable-heartbeat: true timezone: "Asia/Shanghai" target-database: diff --git a/tests/retry_cancel/conf/mysql1.toml b/tests/retry_cancel/conf/source1.toml similarity index 100% rename from tests/retry_cancel/conf/mysql1.toml rename to tests/retry_cancel/conf/source1.toml diff --git a/tests/retry_cancel/conf/mysql2.toml b/tests/retry_cancel/conf/source2.toml similarity index 100% rename from tests/retry_cancel/conf/mysql2.toml rename to tests/retry_cancel/conf/source2.toml diff --git a/tests/retry_cancel/run.sh b/tests/retry_cancel/run.sh index 7fbdb2725a..d736dcb493 100755 --- a/tests/retry_cancel/run.sh +++ b/tests/retry_cancel/run.sh @@ -1,4 +1,5 @@ #!/bin/bash +# TODO: this case can't run under new HA model, already remove from `other_integratin.txt`, add it back when supported. set -eu @@ -22,15 +23,15 @@ function run() { run_dm_worker $WORK_DIR/worker2 $WORKER2_PORT $cur/conf/dm-worker2.toml check_rpc_alive $cur/../bin/check_worker_online 127.0.0.1:$WORKER2_PORT # operate mysql config to worker - cp $cur/conf/mysql1.toml $WORK_DIR/mysql1.toml - cp $cur/conf/mysql2.toml $WORK_DIR/mysql2.toml - sed -i "/relay-binlog-name/i\relay-dir = \"$WORK_DIR/worker1/relay_log\"" $WORK_DIR/mysql1.toml - sed -i "/relay-binlog-name/i\relay-dir = \"$WORK_DIR/worker2/relay_log\"" $WORK_DIR/mysql2.toml + cp $cur/conf/source1.toml $WORK_DIR/source1.toml + cp $cur/conf/source2.toml $WORK_DIR/source2.toml + sed -i "/relay-binlog-name/i\relay-dir = \"$WORK_DIR/worker1/relay_log\"" $WORK_DIR/source1.toml + sed -i "/relay-binlog-name/i\relay-dir = \"$WORK_DIR/worker2/relay_log\"" $WORK_DIR/source2.toml run_dm_ctl $WORK_DIR "127.0.0.1:$MASTER_PORT" \ - "operate-worker create $WORK_DIR/mysql1.toml" \ + "operate-source create $WORK_DIR/source1.toml" \ "true" 1 run_dm_ctl $WORK_DIR "127.0.0.1:$MASTER_PORT" \ - "operate-worker create $WORK_DIR/mysql2.toml" \ + "operate-source create $WORK_DIR/source2.toml" \ "true" 1 # start-task with retry_cancel enabled diff --git a/tests/safe_mode/conf/dm-task.yaml b/tests/safe_mode/conf/dm-task.yaml index 93495c3fd0..f1ad2ae36f 100644 --- a/tests/safe_mode/conf/dm-task.yaml +++ b/tests/safe_mode/conf/dm-task.yaml @@ -4,7 +4,7 @@ task-mode: all is-sharding: true meta-schema: "dm_meta" remove-meta: false -disable-heartbeat: true +enable-heartbeat: false timezone: "Asia/Shanghai" target-database: diff --git a/tests/safe_mode/conf/mysql1.toml b/tests/safe_mode/conf/source1.toml similarity index 100% rename from tests/safe_mode/conf/mysql1.toml rename to tests/safe_mode/conf/source1.toml diff --git a/tests/safe_mode/conf/mysql2.toml b/tests/safe_mode/conf/source2.toml similarity index 100% rename from tests/safe_mode/conf/mysql2.toml rename to tests/safe_mode/conf/source2.toml diff --git a/tests/safe_mode/run.sh b/tests/safe_mode/run.sh index 030c05c9ef..e1230d029d 100755 --- a/tests/safe_mode/run.sh +++ b/tests/safe_mode/run.sh @@ -20,15 +20,15 @@ function run() { run_dm_worker $WORK_DIR/worker2 $WORKER2_PORT $cur/conf/dm-worker2.toml check_rpc_alive $cur/../bin/check_worker_online 127.0.0.1:$WORKER2_PORT # operate mysql config to worker - cp $cur/conf/mysql1.toml $WORK_DIR/mysql1.toml - cp $cur/conf/mysql2.toml $WORK_DIR/mysql2.toml - sed -i "/relay-binlog-name/i\relay-dir = \"$WORK_DIR/worker1/relay_log\"" $WORK_DIR/mysql1.toml - sed -i "/relay-binlog-name/i\relay-dir = \"$WORK_DIR/worker2/relay_log\"" $WORK_DIR/mysql2.toml + cp $cur/conf/source1.toml $WORK_DIR/source1.toml + cp $cur/conf/source2.toml $WORK_DIR/source2.toml + sed -i "/relay-binlog-name/i\relay-dir = \"$WORK_DIR/worker1/relay_log\"" $WORK_DIR/source1.toml + sed -i "/relay-binlog-name/i\relay-dir = \"$WORK_DIR/worker2/relay_log\"" $WORK_DIR/source2.toml run_dm_ctl $WORK_DIR "127.0.0.1:$MASTER_PORT" \ - "operate-worker create $WORK_DIR/mysql1.toml" \ + "operate-source create $WORK_DIR/source1.toml" \ "true" 1 run_dm_ctl $WORK_DIR "127.0.0.1:$MASTER_PORT" \ - "operate-worker create $WORK_DIR/mysql2.toml" \ + "operate-source create $WORK_DIR/source2.toml" \ "true" 1 dmctl_start_task diff --git a/tests/sequence_safe_mode/conf/dm-task.yaml b/tests/sequence_safe_mode/conf/dm-task.yaml index 1a31a7cb92..c403fbcd39 100644 --- a/tests/sequence_safe_mode/conf/dm-task.yaml +++ b/tests/sequence_safe_mode/conf/dm-task.yaml @@ -4,7 +4,7 @@ task-mode: all is-sharding: true meta-schema: "dm_meta" remove-meta: false -disable-heartbeat: true +enable-heartbeat: false timezone: "Asia/Shanghai" target-database: diff --git a/tests/sequence_safe_mode/conf/mysql1.toml b/tests/sequence_safe_mode/conf/source1.toml similarity index 100% rename from tests/sequence_safe_mode/conf/mysql1.toml rename to tests/sequence_safe_mode/conf/source1.toml diff --git a/tests/sequence_safe_mode/conf/mysql2.toml b/tests/sequence_safe_mode/conf/source2.toml similarity index 100% rename from tests/sequence_safe_mode/conf/mysql2.toml rename to tests/sequence_safe_mode/conf/source2.toml diff --git a/tests/sequence_safe_mode/run.sh b/tests/sequence_safe_mode/run.sh index 9acba91ccc..530c378a6a 100755 --- a/tests/sequence_safe_mode/run.sh +++ b/tests/sequence_safe_mode/run.sh @@ -20,15 +20,15 @@ function run() { run_dm_worker $WORK_DIR/worker2 $WORKER2_PORT $cur/conf/dm-worker2.toml check_rpc_alive $cur/../bin/check_worker_online 127.0.0.1:$WORKER2_PORT # operate mysql config to worker - cp $cur/conf/mysql1.toml $WORK_DIR/mysql1.toml - cp $cur/conf/mysql2.toml $WORK_DIR/mysql2.toml - sed -i "/relay-binlog-name/i\relay-dir = \"$WORK_DIR/worker1/relay_log\"" $WORK_DIR/mysql1.toml - sed -i "/relay-binlog-name/i\relay-dir = \"$WORK_DIR/worker2/relay_log\"" $WORK_DIR/mysql2.toml + cp $cur/conf/source1.toml $WORK_DIR/source1.toml + cp $cur/conf/source2.toml $WORK_DIR/source2.toml + sed -i "/relay-binlog-name/i\relay-dir = \"$WORK_DIR/worker1/relay_log\"" $WORK_DIR/source1.toml + sed -i "/relay-binlog-name/i\relay-dir = \"$WORK_DIR/worker2/relay_log\"" $WORK_DIR/source2.toml run_dm_ctl $WORK_DIR "127.0.0.1:$MASTER_PORT" \ - "operate-worker create $WORK_DIR/mysql1.toml" \ + "operate-source create $WORK_DIR/source1.toml" \ "true" 1 run_dm_ctl $WORK_DIR "127.0.0.1:$MASTER_PORT" \ - "operate-worker create $WORK_DIR/mysql2.toml" \ + "operate-source create $WORK_DIR/source2.toml" \ "true" 1 dmctl_start_task diff --git a/tests/sequence_sharding/conf/dm-task.yaml b/tests/sequence_sharding/conf/dm-task.yaml index c809bf532a..b4ec188ebe 100644 --- a/tests/sequence_sharding/conf/dm-task.yaml +++ b/tests/sequence_sharding/conf/dm-task.yaml @@ -4,7 +4,7 @@ task-mode: all is-sharding: true meta-schema: "dm_meta" remove-meta: false -enable-heartbeat: true +# enable-heartbeat: true timezone: "Asia/Shanghai" target-database: diff --git a/tests/sequence_sharding/conf/mysql1.toml b/tests/sequence_sharding/conf/source1.toml similarity index 100% rename from tests/sequence_sharding/conf/mysql1.toml rename to tests/sequence_sharding/conf/source1.toml diff --git a/tests/sequence_sharding/conf/mysql2.toml b/tests/sequence_sharding/conf/source2.toml similarity index 100% rename from tests/sequence_sharding/conf/mysql2.toml rename to tests/sequence_sharding/conf/source2.toml diff --git a/tests/sequence_sharding/run.sh b/tests/sequence_sharding/run.sh index 32e25bb24e..65310ba1ea 100755 --- a/tests/sequence_sharding/run.sh +++ b/tests/sequence_sharding/run.sh @@ -17,15 +17,15 @@ function run() { run_dm_worker $WORK_DIR/worker2 $WORKER2_PORT $cur/conf/dm-worker2.toml check_rpc_alive $cur/../bin/check_worker_online 127.0.0.1:$WORKER2_PORT # operate mysql config to worker - cp $cur/conf/mysql1.toml $WORK_DIR/mysql1.toml - cp $cur/conf/mysql2.toml $WORK_DIR/mysql2.toml - sed -i "/relay-binlog-name/i\relay-dir = \"$WORK_DIR/worker1/relay_log\"" $WORK_DIR/mysql1.toml - sed -i "/relay-binlog-name/i\relay-dir = \"$WORK_DIR/worker2/relay_log\"" $WORK_DIR/mysql2.toml + cp $cur/conf/source1.toml $WORK_DIR/source1.toml + cp $cur/conf/source2.toml $WORK_DIR/source2.toml + sed -i "/relay-binlog-name/i\relay-dir = \"$WORK_DIR/worker1/relay_log\"" $WORK_DIR/source1.toml + sed -i "/relay-binlog-name/i\relay-dir = \"$WORK_DIR/worker2/relay_log\"" $WORK_DIR/source2.toml run_dm_ctl $WORK_DIR "127.0.0.1:$MASTER_PORT" \ - "operate-worker create $WORK_DIR/mysql1.toml" \ + "operate-source create $WORK_DIR/source1.toml" \ "true" 1 run_dm_ctl $WORK_DIR "127.0.0.1:$MASTER_PORT" \ - "operate-worker create $WORK_DIR/mysql2.toml" \ + "operate-source create $WORK_DIR/source2.toml" \ "true" 1 # start DM task only diff --git a/tests/sharding/conf/dm-task.yaml b/tests/sharding/conf/dm-task.yaml index fd2a7ec7ad..5a3cede805 100644 --- a/tests/sharding/conf/dm-task.yaml +++ b/tests/sharding/conf/dm-task.yaml @@ -4,7 +4,7 @@ task-mode: all is-sharding: true meta-schema: "dm_meta" remove-meta: false -enable-heartbeat: true +# enable-heartbeat: true timezone: "Asia/Shanghai" target-database: diff --git a/tests/sharding/conf/mysql1.toml b/tests/sharding/conf/source1.toml similarity index 100% rename from tests/sharding/conf/mysql1.toml rename to tests/sharding/conf/source1.toml diff --git a/tests/sharding/conf/mysql2.toml b/tests/sharding/conf/source2.toml similarity index 100% rename from tests/sharding/conf/mysql2.toml rename to tests/sharding/conf/source2.toml diff --git a/tests/sharding/run.sh b/tests/sharding/run.sh index b50e034064..94f6596eb4 100755 --- a/tests/sharding/run.sh +++ b/tests/sharding/run.sh @@ -29,15 +29,15 @@ function run() { run_dm_worker $WORK_DIR/worker2 $WORKER2_PORT $cur/conf/dm-worker2.toml check_rpc_alive $cur/../bin/check_worker_online 127.0.0.1:$WORKER2_PORT # operate mysql config to worker - cp $cur/conf/mysql1.toml $WORK_DIR/mysql1.toml - cp $cur/conf/mysql2.toml $WORK_DIR/mysql2.toml - sed -i "/relay-binlog-name/i\relay-dir = \"$WORK_DIR/worker1/relay_log\"" $WORK_DIR/mysql1.toml - sed -i "/relay-binlog-name/i\relay-dir = \"$WORK_DIR/worker2/relay_log\"" $WORK_DIR/mysql2.toml + cp $cur/conf/source1.toml $WORK_DIR/source1.toml + cp $cur/conf/source2.toml $WORK_DIR/source2.toml + sed -i "/relay-binlog-name/i\relay-dir = \"$WORK_DIR/worker1/relay_log\"" $WORK_DIR/source1.toml + sed -i "/relay-binlog-name/i\relay-dir = \"$WORK_DIR/worker2/relay_log\"" $WORK_DIR/source2.toml run_dm_ctl $WORK_DIR "127.0.0.1:$MASTER_PORT" \ - "operate-worker create $WORK_DIR/mysql1.toml" \ + "operate-source create $WORK_DIR/source1.toml" \ "true" 1 run_dm_ctl $WORK_DIR "127.0.0.1:$MASTER_PORT" \ - "operate-worker create $WORK_DIR/mysql2.toml" \ + "operate-source create $WORK_DIR/source2.toml" \ "true" 1 # start DM task only diff --git a/tests/start_task/conf/dm-task.yaml b/tests/start_task/conf/dm-task.yaml index a51b9087e2..a23f1d44c6 100644 --- a/tests/start_task/conf/dm-task.yaml +++ b/tests/start_task/conf/dm-task.yaml @@ -4,7 +4,7 @@ task-mode: all is-sharding: false meta-schema: "dm_meta" remove-meta: false -enable-heartbeat: true +# enable-heartbeat: true timezone: "Asia/Shanghai" target-database: diff --git a/tests/start_task/conf/mysql1.toml b/tests/start_task/conf/source1.toml similarity index 100% rename from tests/start_task/conf/mysql1.toml rename to tests/start_task/conf/source1.toml diff --git a/tests/start_task/run.sh b/tests/start_task/run.sh index 97e043b634..d7eba5f423 100644 --- a/tests/start_task/run.sh +++ b/tests/start_task/run.sh @@ -36,10 +36,10 @@ function run() { run_dm_worker $WORK_DIR/worker1 $WORKER1_PORT $cur/conf/dm-worker1.toml check_rpc_alive $cur/../bin/check_worker_online 127.0.0.1:$WORKER1_PORT # operate mysql config to worker - cp $cur/conf/mysql1.toml $WORK_DIR/mysql1.toml - sed -i "/relay-binlog-name/i\relay-dir = \"$WORK_DIR/worker1/relay_log\"" $WORK_DIR/mysql1.toml + cp $cur/conf/source1.toml $WORK_DIR/source1.toml + sed -i "/relay-binlog-name/i\relay-dir = \"$WORK_DIR/worker1/relay_log\"" $WORK_DIR/source1.toml run_dm_ctl $WORK_DIR "127.0.0.1:$MASTER_PORT" \ - "operate-worker create $WORK_DIR/mysql1.toml" \ + "operate-source create $WORK_DIR/source1.toml" \ "true" 1 echo "check un-accessible DM-worker exists" @@ -61,10 +61,11 @@ function run() { run_dm_master $WORK_DIR/master $MASTER_PORT $cur/conf/dm-master.toml check_rpc_alive $cur/../bin/check_master_online 127.0.0.1:$MASTER_PORT + sleep 5 run_dm_ctl $WORK_DIR "127.0.0.1:$MASTER_PORT" \ "start-task $task_conf" \ - "\"result\": true" 2 + "\"result\": true" 1 check_sync_diff $WORK_DIR $cur/conf/diff_config.toml