From 9893157fd5f530252b5cd7acff69f9e95065f421 Mon Sep 17 00:00:00 2001 From: csuzhangxc Date: Sat, 2 Nov 2019 19:11:22 +0800 Subject: [PATCH 01/27] *: combine etcd server, gRPC API server and other HTTP servers --- cmd/dm-master/main.go | 30 ++-- dm/master/etcd.go | 114 +++++++++++++++ dm/master/{status.go => http_handler.go} | 37 ++++- .../{status_test.go => http_handler_test.go} | 0 dm/master/server.go | 135 +++++++----------- go.mod | 2 +- 6 files changed, 222 insertions(+), 96 deletions(-) create mode 100644 dm/master/etcd.go rename dm/master/{status.go => http_handler.go} (52%) rename dm/master/{status_test.go => http_handler_test.go} (100%) diff --git a/cmd/dm-master/main.go b/cmd/dm-master/main.go index 601e25bc46..66d6e3ff6d 100644 --- a/cmd/dm-master/main.go +++ b/cmd/dm-master/main.go @@ -14,6 +14,7 @@ package main import ( + "context" "flag" "fmt" "os" @@ -30,6 +31,7 @@ import ( ) func main() { + // 1. parse config cfg := master.NewConfig() err := cfg.Parse(os.Args[1:]) switch errors.Cause(err) { @@ -41,6 +43,7 @@ func main() { os.Exit(2) } + // 2. init logger err = log.InitLogger(&log.Config{ File: cfg.LogFile, Level: strings.ToLower(cfg.LogLevel), @@ -50,39 +53,42 @@ func main() { os.Exit(2) } + // 3. print process version information utils.PrintInfo("dm-master", func() { log.L().Info("", zap.Stringer("dm-master config", cfg)) }) + // 4. start the server + ctx, cancel := context.WithCancel(context.Background()) + server := master.NewServer(cfg) + err = server.Start(ctx) + if err != nil { + log.L().Error("fail to start dm-master", zap.Error(err)) + os.Exit(2) + } + + // 5. wait for stopping the process sc := make(chan os.Signal, 1) signal.Notify(sc, syscall.SIGHUP, syscall.SIGINT, syscall.SIGTERM, syscall.SIGQUIT) - - server := master.NewServer(cfg) - go func() { sig := <-sc log.L().Info("got signal to exit", zap.Stringer("signal", sig)) - server.Close() + cancel() }() + <-ctx.Done() - err = server.Start() - if err != nil { - log.L().Error("fail to start dm-master", zap.Error(err)) - } + // 6. close the server server.Close() - log.L().Info("dm-master exit") + // 7. flush log syncErr := log.L().Sync() if syncErr != nil { fmt.Fprintln(os.Stderr, "sync log failed", syncErr) - } - - if err != nil || syncErr != nil { os.Exit(1) } } diff --git a/dm/master/etcd.go b/dm/master/etcd.go new file mode 100644 index 0000000000..75790ce60b --- /dev/null +++ b/dm/master/etcd.go @@ -0,0 +1,114 @@ +// Copyright 2019 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + +package master + +import ( + "fmt" + "net/http" + "net/url" + "strings" + "time" + + "github.com/pingcap/errors" + "go.etcd.io/etcd/embed" + "google.golang.org/grpc" +) + +const ( + etcdDataDir = "dm.etcd" + defaultLpUrl = "http://127.0.0.1:8269" // TODO: make it as config item. + defaultClusterKey = "dm-master" // TODO: adjust in config +) + +// startEtcd starts an embedded etcd server. +// reuse +func startEtcd(lcUrl string, lpUrl string, + gRPCSvr func(*grpc.Server), + httpHandles map[string]http.Handler) (*embed.Etcd, error) { + cfg := embed.NewConfig() + cfg.Dir = etcdDataDir + + // we only use one client listening URL now + lcUrl2, err := parseUrls(lcUrl) + if err != nil { + return nil, err + } + cfg.LCUrls = lcUrl2 + cfg.ACUrls = lcUrl2 + + //if lpUrl == "" { + // lpUrl = defaultLpUrl + //} + //lpUrl2, err := parseUrls(lpUrl) + //if err != nil { + // return nil, err + //} + //cfg.LPUrls = lpUrl2 + //cfg.APUrls = lpUrl2 + // + //cfg.InitialCluster = initialClusterFromLpUrls(lpUrl) + + // attach extra gRPC and HTTP server + if gRPCSvr != nil { + cfg.ServiceRegister = gRPCSvr + } + if httpHandles != nil { + cfg.UserHandlers = httpHandles + } + + e, err := embed.StartEtcd(cfg) + if err != nil { + return nil, err + } + + select { + case <-e.Server.ReadyNotify(): + case <-time.After(time.Minute): + e.Server.Stop() + e.Close() + return nil, errors.New("start etcd server timeout") + } + return e, nil +} + +// parseUrls parse a string into multiple urls. +// if the URL in the string without protocol scheme, use `http` as the default. +// if no IP exists in the address, `0.0.0.0` is used. +func parseUrls(s string) ([]url.URL, error) { + items := strings.Split(s, ",") + urls := make([]url.URL, 0, len(items)) + for _, item := range items { + u, err := url.Parse(item) + if err != nil && strings.Contains(err.Error(), "missing protocol scheme") { + u, err = url.Parse("http://" + item) + } + if err != nil { + return nil, err + } + if strings.Index(u.Host, ":") == 0 { + u.Host = "0.0.0.0" + u.Host + } + urls = append(urls, *u) + } + return urls, nil +} + +// initialClusterFromLpUrls gets `--initial-cluster` from lp URLs. +func initialClusterFromLpUrls(lpUrls string) string { + items := strings.Split(lpUrls, ",") + for i, item := range items { + items[i] = fmt.Sprintf("%s=%s", defaultClusterKey, item) + } + return strings.Join(items, ",") +} diff --git a/dm/master/status.go b/dm/master/http_handler.go similarity index 52% rename from dm/master/status.go rename to dm/master/http_handler.go index 24a144bc81..234d5b3b30 100644 --- a/dm/master/status.go +++ b/dm/master/http_handler.go @@ -14,14 +14,21 @@ package master import ( + "context" "net/http" "net/http/pprof" + "github.com/grpc-ecosystem/grpc-gateway/runtime" + "google.golang.org/grpc" + "github.com/pingcap/dm/dm/common" + "github.com/pingcap/dm/dm/pb" "github.com/pingcap/dm/pkg/log" + "github.com/pingcap/dm/pkg/terror" "github.com/pingcap/dm/pkg/utils" ) +// statusHandler handles process status. type statusHandler struct { } @@ -34,12 +41,36 @@ func (h *statusHandler) ServeHTTP(w http.ResponseWriter, req *http.Request) { } } -// HandleStatus handles functions for getting status by HTTP request -func HandleStatus(mux *http.ServeMux) { - mux.Handle("/status", &statusHandler{}) +// getStatusHandle returns a HTTP handler to handle process status. +func getStatusHandle() http.Handler { + return &statusHandler{} +} + +// getHTTPAPIHandler returns a HTTP handler to handle DM-master APIs. +func getHTTPAPIHandler(ctx context.Context, addr string) (http.Handler, error) { + // dial the real API server in non-blocking mode, it may not started yet. + opts := []grpc.DialOption{grpc.WithInsecure()} + // NOTE: should we need to replace `host` in `addr` to `127.0.0.1`? + conn, err := grpc.DialContext(ctx, addr, opts...) + if err != nil { + return nil, terror.ErrMasterHandleHTTPApis.Delegate(err) + } + + gwmux := runtime.NewServeMux() + err = pb.RegisterMasterHandler(ctx, gwmux, conn) + if err != nil { + return nil, terror.ErrMasterHandleHTTPApis.Delegate(err) + } + return gwmux, nil +} + +// getDebugHandler returns a HTTP handler to handle debug information. +func getDebugHandler() http.Handler { + mux := http.NewServeMux() mux.HandleFunc("/debug/pprof/", pprof.Index) mux.HandleFunc("/debug/pprof/cmdline", pprof.Cmdline) mux.HandleFunc("/debug/pprof/profile", pprof.Profile) mux.HandleFunc("/debug/pprof/symbol", pprof.Symbol) mux.HandleFunc("/debug/pprof/trace", pprof.Trace) + return mux } diff --git a/dm/master/status_test.go b/dm/master/http_handler_test.go similarity index 100% rename from dm/master/status_test.go rename to dm/master/http_handler_test.go diff --git a/dm/master/server.go b/dm/master/server.go index d928c9f298..4ae1b43500 100644 --- a/dm/master/server.go +++ b/dm/master/server.go @@ -27,12 +27,11 @@ import ( "github.com/grpc-ecosystem/grpc-gateway/runtime" "github.com/pingcap/errors" "github.com/siddontang/go/sync2" - "github.com/soheilhy/cmux" + "go.etcd.io/etcd/embed" "go.uber.org/zap" "google.golang.org/grpc" "github.com/pingcap/dm/checker" - "github.com/pingcap/dm/dm/common" "github.com/pingcap/dm/dm/config" "github.com/pingcap/dm/dm/master/sql-operator" "github.com/pingcap/dm/dm/master/workerrpc" @@ -45,7 +44,6 @@ import ( var ( fetchDDLInfoRetryTimeout = 5 * time.Second - cmuxReadTimeout = 10 * time.Second ) // Server handles RPC requests for dm-master @@ -54,8 +52,11 @@ type Server struct { cfg *Config - rootLis net.Listener - svr *grpc.Server + // the embed etcd server, and the gRPC/HTTP API server also attached to it. + etcd *embed.Etcd + + // WaitGroup for background functions. + bgFunWg sync.WaitGroup // dm-worker-ID(host:ip) -> dm-worker client management workerClients map[string]workerrpc.Client @@ -95,43 +96,61 @@ func NewServer(cfg *Config) *Server { } // Start starts to serving -func (s *Server) Start() error { - var err error - - _, _, err = s.splitHostPort() +func (s *Server) Start(ctx context.Context) error { + // TODO: check config in config.go? + _, _, err := s.splitHostPort() if err != nil { return err } - s.rootLis, err = net.Listen("tcp", s.cfg.MasterAddr) - if err != nil { - return terror.ErrMasterStartService.Delegate(err) - } - + // create clients to DM-workers for _, workerAddr := range s.cfg.DeployMap { s.workerClients[workerAddr], err = workerrpc.NewGRPCClient(workerAddr) if err != nil { return err } } - s.closed.Set(false) - ctx, cancel := context.WithCancel(context.Background()) - var wg sync.WaitGroup - defer func() { - cancel() - wg.Wait() - }() + // get an HTTP to gRPC API handler. + apiHandler, err := getHTTPAPIHandler(ctx, s.cfg.MasterAddr) + if err != nil { + return err + } + + // HTTP handlers on etcd's client IP:port + // no `metrics` for DM-master now, add it later. + // NOTE: after received any HTTP request from chrome browser, + // the server may be blocked when closing sometime. + // And any request to etcd's builtin handler has the same problem. + // And curl or safari browser does trigger this problem. + // But I haven't figured it out. + // (maybe more requests are sent from chrome or its extensions). + userHandles := map[string]http.Handler{ + "/apis/": apiHandler, + "/status": getStatusHandle(), + "/debug/": getDebugHandler(), + } + + // gRPC API server + gRPCSvr := func(gs *grpc.Server) { pb.RegisterMasterServer(gs, s) } + + // start embed etcd server, gRPC API server and HTTP (API, status and debug) server. + s.etcd, err = startEtcd(s.cfg.MasterAddr, "", gRPCSvr, userHandles) + if err != nil { + return err + } - wg.Add(1) + s.closed.Set(false) // the server started now. + + s.bgFunWg.Add(1) go func() { - defer wg.Done() + defer s.bgFunWg.Done() s.ap.Start(ctx) }() - wg.Add(1) + s.bgFunWg.Add(1) go func() { - defer wg.Done() + defer s.bgFunWg.Done() select { case <-ctx.Done(): return @@ -141,59 +160,15 @@ func (s *Server) Start() error { } }() - wg.Add(1) + s.bgFunWg.Add(1) go func() { - defer wg.Done() + defer s.bgFunWg.Done() // fetch DDL info from dm-workers to sync sharding DDL s.fetchWorkerDDLInfo(ctx) }() - // create a cmux - m := cmux.New(s.rootLis) - m.SetReadTimeout(cmuxReadTimeout) // set a timeout, ref: https://github.com/pingcap/tidb-binlog/pull/352 - - // match connections in order: first gRPC, then HTTP - grpcL := m.MatchWithWriters(cmux.HTTP2MatchHeaderFieldSendSettings("content-type", "application/grpc")) - httpL := m.Match(cmux.HTTP1Fast()) - - s.svr = grpc.NewServer() - pb.RegisterMasterServer(s.svr, s) - go func() { - err2 := s.svr.Serve(grpcL) - if err2 != nil && !common.IsErrNetClosing(err2) && err2 != cmux.ErrListenerClosed { - log.L().Error("fail to start gRPC server", zap.Error(err2)) - } - }() - - httpmux := http.NewServeMux() - HandleStatus(httpmux) // serve status - - err = s.HandleHTTPApis(ctx, httpmux) // server http api - if err != nil { - return err - } - - wg.Add(1) - go func() { - defer wg.Done() - - httpS := &http.Server{ - Handler: httpmux, - } - - err3 := httpS.Serve(httpL) - if err3 != nil && !common.IsErrNetClosing(err3) && err3 != http.ErrServerClosed { - log.L().Error("run http server", log.ShortError(err3)) - } - }() - log.L().Info("listening gRPC API and status request", zap.String("address", s.cfg.MasterAddr)) - err = m.Serve() // start serving, block - if err != nil && common.IsErrNetClosing(err) { - err = nil - } - - return err + return nil } // Close close the RPC server, this function can be called multiple times @@ -203,14 +178,14 @@ func (s *Server) Close() { if s.closed.Get() { return } - if s.rootLis != nil { - err := s.rootLis.Close() - if err != nil && !common.IsErrNetClosing(err) { - log.L().Error("close net listener", zap.Error(err)) - } - } - if s.svr != nil { - s.svr.GracefulStop() + log.L().Info("closing server") + + // wait for background functions returned + s.bgFunWg.Wait() + + // close the etcd and other attached servers + if s.etcd != nil { + s.etcd.Close() } s.closed.Set(true) } diff --git a/go.mod b/go.mod index f837bbff3b..d734b235fa 100644 --- a/go.mod +++ b/go.mod @@ -60,7 +60,7 @@ require ( github.com/uber/jaeger-client-go v2.16.0+incompatible // indirect github.com/uber/jaeger-lib v2.0.0+incompatible // indirect github.com/unrolled/render v1.0.1 // indirect - go.etcd.io/etcd v3.3.15+incompatible // indirect + go.etcd.io/etcd v3.3.15+incompatible go.uber.org/atomic v1.4.0 // indirect go.uber.org/zap v1.10.0 golang.org/x/crypto v0.0.0-20191002192127-34f69633bfdc // indirect From 025a3a8eb8b1a68e04bccee98dfac5fbff6fad08 Mon Sep 17 00:00:00 2001 From: csuzhangxc Date: Mon, 4 Nov 2019 13:16:41 +0800 Subject: [PATCH 02/27] *: fix tests; refine config item --- _utils/terror_gen/errors_release.txt | 3 + dm/master/config.go | 109 ++++++++++++++++++++++++++- dm/master/etcd.go | 70 ++--------------- dm/master/http_handler_test.go | 20 ++--- dm/master/server.go | 2 +- dm/master/server_test.go | 22 +++--- pkg/terror/error_list.go | 76 ++++++++++--------- 7 files changed, 179 insertions(+), 123 deletions(-) diff --git a/_utils/terror_gen/errors_release.txt b/_utils/terror_gen/errors_release.txt index ac8601f824..d0e842ce31 100644 --- a/_utils/terror_gen/errors_release.txt +++ b/_utils/terror_gen/errors_release.txt @@ -312,6 +312,9 @@ ErrMasterOperRespNotSuccess,[code=38032:class=dm-master:scope=internal:level=hig ErrMasterOperRequestTimeout,[code=38033:class=dm-master:scope=internal:level=high],"request is timeout, but request may be successful, please execute `query-status` to check status" ErrMasterHandleHTTPApis,[code=38034:class=dm-master:scope=internal:level=high],"serve http apis to grpc" ErrMasterHostPortNotValid,[code=38035:class=dm-master:scope=internal:level=high],"host:port '%s' not valid" +ErrMasterGetHostnameFail,[code=38036:class=dm-master:scope=internal:level=high],"get hostname fail" +ErrMasterGenEmbedEtcdConfigFail,[code=38037:class=dm-master:scope=internal:level=high],"generate config item %s for embed etcd fail" +ErrMasterStartEmbedEtcdFail,[code=38038:class=dm-master:scope=internal:level=high],"start embed etcd fail" ErrWorkerParseFlagSet,[code=40001:class=dm-worker:scope=internal:level=medium],"parse dm-worker config flag set" ErrWorkerInvalidFlag,[code=40002:class=dm-worker:scope=internal:level=medium],"'%s' is an invalid flag" ErrWorkerDecodeConfigFromFile,[code=40003:class=dm-worker:scope=internal:level=medium],"toml decode file" diff --git a/dm/master/config.go b/dm/master/config.go index c082076ca4..dd71a64e31 100644 --- a/dm/master/config.go +++ b/dm/master/config.go @@ -19,15 +19,24 @@ import ( "flag" "fmt" "io/ioutil" + "net/url" + "os" "strings" "time" + "github.com/BurntSushi/toml" + "go.etcd.io/etcd/embed" + "go.uber.org/zap" + "github.com/pingcap/dm/pkg/log" "github.com/pingcap/dm/pkg/terror" "github.com/pingcap/dm/pkg/utils" +) - "github.com/BurntSushi/toml" - "go.uber.org/zap" +const ( + defaultRPCTimeout = "30s" + defaultNamePrefix = "dm-master" + defaultPeerUrls = "http://127.0.0.1:8269" ) // SampleConfigFile is sample config file of dm-master @@ -35,8 +44,6 @@ import ( // and assign it to SampleConfigFile while we build dm-master var SampleConfigFile string -var defaultRPCTimeout = "30s" - // NewConfig creates a config for dm-master func NewConfig() *Config { cfg := &Config{} @@ -51,6 +58,12 @@ func NewConfig() *Config { fs.StringVar(&cfg.LogFile, "log-file", "", "log file path") //fs.StringVar(&cfg.LogRotate, "log-rotate", "day", "log file rotate type, hour/day") + fs.StringVar(&cfg.Name, "name", "", "human-readable name for this DM-master member") + fs.StringVar(&cfg.DataDir, "data-dir", "", "path to the data directory (default 'default.${name}')") + fs.StringVar(&cfg.InitialCluster, "initial-cluster", "", fmt.Sprintf("initial cluster configuration for bootstrapping, e,g. dm-master=%s", defaultPeerUrls)) + fs.StringVar(&cfg.PeerUrls, "peer-urls", defaultPeerUrls, "URLs for peer traffic") + fs.StringVar(&cfg.AdvertisePeerUrls, "advertise-peer-urls", "", "advertise URLs for peer traffic (default '${peer-urls}')") + return cfg } @@ -90,6 +103,15 @@ type Config struct { ConfigFile string `json:"config-file"` + // etcd relative config items + // NOTE: we use `MasterAddr` to generate `ClientUrls` and `AdvertiseClientUrls` + // NOTE: more items will be add when adding leader election + Name string `toml:"name" json:"name"` + DataDir string `toml:"data-dir" json:"data-dir"` + PeerUrls string `toml:"peer-urls" json:"peer-urls"` + AdvertisePeerUrls string `toml:"advertise-peer-urls" json:"advertise-peer-urls"` + InitialCluster string `toml:"initial-cluster" json:"initial-cluster"` + printVersion bool printSampleConfig bool } @@ -202,6 +224,34 @@ func (c *Config) adjust() error { c.RPCRateBurst = DefaultBurst } + if c.Name == "" { + hostname, err := os.Hostname() + if err != nil { + return terror.ErrMasterGetHostnameFail.Delegate(err) + } + c.Name = fmt.Sprintf("%s-%s", defaultNamePrefix, hostname) + } + + if c.DataDir == "" { + c.DataDir = fmt.Sprintf("default.%s", c.Name) + } + + if c.PeerUrls == "" { + c.PeerUrls = defaultPeerUrls + } + + if c.AdvertisePeerUrls == "" { + c.AdvertisePeerUrls = defaultPeerUrls + } + + if c.InitialCluster == "" { + items := strings.Split(c.AdvertisePeerUrls, ",") + for i, item := range items { + items[i] = fmt.Sprintf("%s=%s", c.Name, item) + } + c.InitialCluster = strings.Join(items, ",") + } + return nil } @@ -228,3 +278,54 @@ func (c *Config) Reload() error { return c.adjust() } + +// genEmbedEtcdConfig generates the configuration needed by embed etcd. +func (c *Config) genEmbedEtcdConfig() (*embed.Config, error) { + cfg := embed.NewConfig() + cfg.Name = c.Name + cfg.Dir = c.DataDir + + // reuse the previous master-addr as the client listening URL. + cURL, err := parseUrls(c.MasterAddr) + if err != nil { + return nil, terror.ErrMasterGenEmbedEtcdConfigFail.Delegate(err, "--listen-client-urls/--advertise-client-urls") + } + cfg.LCUrls = cURL + cfg.ACUrls = cURL + + cfg.LPUrls, err = parseUrls(c.PeerUrls) + if err != nil { + return nil, terror.ErrMasterGenEmbedEtcdConfigFail.Delegate(err, "--listen-peer-urls") + } + + cfg.APUrls, err = parseUrls(c.AdvertisePeerUrls) + if err != nil { + return nil, terror.ErrMasterGenEmbedEtcdConfigFail.Delegate(err, "--initial-advertise-peer-urls") + } + + cfg.InitialCluster = c.InitialCluster + + return cfg, nil +} + +// parseUrls parse a string into multiple urls. +// if the URL in the string without protocol scheme, use `http` as the default. +// if no IP exists in the address, `0.0.0.0` is used. +func parseUrls(s string) ([]url.URL, error) { + items := strings.Split(s, ",") + urls := make([]url.URL, 0, len(items)) + for _, item := range items { + u, err := url.Parse(item) + if err != nil && strings.Contains(err.Error(), "missing protocol scheme") { + u, err = url.Parse("http://" + item) + } + if err != nil { + return nil, err + } + if strings.Index(u.Host, ":") == 0 { + u.Host = "0.0.0.0" + u.Host + } + urls = append(urls, *u) + } + return urls, nil +} diff --git a/dm/master/etcd.go b/dm/master/etcd.go index 75790ce60b..c9b37b7d02 100644 --- a/dm/master/etcd.go +++ b/dm/master/etcd.go @@ -14,50 +14,24 @@ package master import ( - "fmt" "net/http" - "net/url" - "strings" "time" "github.com/pingcap/errors" "go.etcd.io/etcd/embed" "google.golang.org/grpc" -) -const ( - etcdDataDir = "dm.etcd" - defaultLpUrl = "http://127.0.0.1:8269" // TODO: make it as config item. - defaultClusterKey = "dm-master" // TODO: adjust in config + "github.com/pingcap/dm/pkg/terror" ) // startEtcd starts an embedded etcd server. -// reuse -func startEtcd(lcUrl string, lpUrl string, +func startEtcd(masterCfg *Config, gRPCSvr func(*grpc.Server), httpHandles map[string]http.Handler) (*embed.Etcd, error) { - cfg := embed.NewConfig() - cfg.Dir = etcdDataDir - - // we only use one client listening URL now - lcUrl2, err := parseUrls(lcUrl) + cfg, err := masterCfg.genEmbedEtcdConfig() if err != nil { return nil, err } - cfg.LCUrls = lcUrl2 - cfg.ACUrls = lcUrl2 - - //if lpUrl == "" { - // lpUrl = defaultLpUrl - //} - //lpUrl2, err := parseUrls(lpUrl) - //if err != nil { - // return nil, err - //} - //cfg.LPUrls = lpUrl2 - //cfg.APUrls = lpUrl2 - // - //cfg.InitialCluster = initialClusterFromLpUrls(lpUrl) // attach extra gRPC and HTTP server if gRPCSvr != nil { @@ -69,46 +43,16 @@ func startEtcd(lcUrl string, lpUrl string, e, err := embed.StartEtcd(cfg) if err != nil { - return nil, err + return nil, terror.ErrMasterStartEmbedEtcdFail.Delegate(err) } + timeout := time.Minute select { case <-e.Server.ReadyNotify(): - case <-time.After(time.Minute): + case <-time.After(timeout): e.Server.Stop() e.Close() - return nil, errors.New("start etcd server timeout") + return nil, terror.ErrMasterStartEmbedEtcdFail.Delegate(errors.Errorf("start embed etcd timeout %v", timeout)) } return e, nil } - -// parseUrls parse a string into multiple urls. -// if the URL in the string without protocol scheme, use `http` as the default. -// if no IP exists in the address, `0.0.0.0` is used. -func parseUrls(s string) ([]url.URL, error) { - items := strings.Split(s, ",") - urls := make([]url.URL, 0, len(items)) - for _, item := range items { - u, err := url.Parse(item) - if err != nil && strings.Contains(err.Error(), "missing protocol scheme") { - u, err = url.Parse("http://" + item) - } - if err != nil { - return nil, err - } - if strings.Index(u.Host, ":") == 0 { - u.Host = "0.0.0.0" + u.Host - } - urls = append(urls, *u) - } - return urls, nil -} - -// initialClusterFromLpUrls gets `--initial-cluster` from lp URLs. -func initialClusterFromLpUrls(lpUrls string) string { - items := strings.Split(lpUrls, ",") - for i, item := range items { - items[i] = fmt.Sprintf("%s=%s", defaultClusterKey, item) - } - return strings.Join(items, ",") -} diff --git a/dm/master/http_handler_test.go b/dm/master/http_handler_test.go index b3b5300b78..904f7261b6 100644 --- a/dm/master/http_handler_test.go +++ b/dm/master/http_handler_test.go @@ -14,6 +14,7 @@ package master import ( + "context" "fmt" "io/ioutil" "net/http" @@ -30,19 +31,16 @@ type testHTTPServer struct { cfg *Config } -func (t *testHTTPServer) startServer(c *check.C) { +func (t *testHTTPServer) startServer(ctx context.Context, c *check.C) { t.cfg = NewConfig() t.cfg.MasterAddr = ":8261" t.cfg.RPCRateLimit = DefaultRate t.cfg.RPCRateBurst = DefaultBurst + t.cfg.DataDir = c.MkDir() + c.Assert(t.cfg.adjust(), check.IsNil) t.server = NewServer(t.cfg) - go func() { - err := t.server.Start() - c.Assert(err, check.IsNil) - }() - - err := t.waitUntilServerOnline() + err := t.server.Start(ctx) c.Assert(err, check.IsNil) } @@ -69,8 +67,12 @@ func (t *testHTTPServer) waitUntilServerOnline() error { } func (t *testHTTPServer) TestStatus(c *check.C) { - t.startServer(c) - defer t.stopServer(c) + ctx, cancel := context.WithCancel(context.Background()) + t.startServer(ctx, c) + defer func() { + cancel() + t.stopServer(c) + }() statusURL := fmt.Sprintf("http://127.0.0.1%s/status", t.cfg.MasterAddr) resp, err := http.Get(statusURL) diff --git a/dm/master/server.go b/dm/master/server.go index 4ae1b43500..a9a8d326af 100644 --- a/dm/master/server.go +++ b/dm/master/server.go @@ -135,7 +135,7 @@ func (s *Server) Start(ctx context.Context) error { gRPCSvr := func(gs *grpc.Server) { pb.RegisterMasterServer(gs, s) } // start embed etcd server, gRPC API server and HTTP (API, status and debug) server. - s.etcd, err = startEtcd(s.cfg.MasterAddr, "", gRPCSvr, userHandles) + s.etcd, err = startEtcd(s.cfg, gRPCSvr, userHandles) if err != nil { return err } diff --git a/dm/master/server_test.go b/dm/master/server_test.go index 3c335001c1..9ca95c3f6d 100644 --- a/dm/master/server_test.go +++ b/dm/master/server_test.go @@ -153,6 +153,7 @@ func testDefaultMasterServer(c *check.C) *Server { cfg := NewConfig() err := cfg.Parse([]string{"-config=./dm-master.toml"}) c.Assert(err, check.IsNil) + cfg.DataDir = c.MkDir() server := NewServer(cfg) go server.ap.Start(context.Background()) @@ -1486,33 +1487,32 @@ func (t *testMaster) TestFetchWorkerDDLInfo(c *check.C) { func (t *testMaster) TestServer(c *check.C) { cfg := NewConfig() c.Assert(cfg.Parse([]string{"-config=./dm-master.toml"}), check.IsNil) + cfg.DataDir = c.MkDir() s := NewServer(cfg) masterAddr := cfg.MasterAddr s.cfg.MasterAddr = "" - err := s.Start() + ctx1, cancel1 := context.WithCancel(context.Background()) + err := s.Start(ctx1) c.Assert(terror.ErrMasterHostPortNotValid.Equal(err), check.IsTrue) + cancel1() s.Close() s.cfg.MasterAddr = masterAddr - go func() { - err1 := s.Start() - c.Assert(err1, check.IsNil) - }() - - c.Assert(utils.WaitSomething(30, 100*time.Millisecond, func() bool { - return !s.closed.Get() - }), check.IsTrue) + ctx2, cancel2 := context.WithCancel(context.Background()) + err1 := s.Start(ctx2) + c.Assert(err1, check.IsNil) t.testHTTPInterface(c, "status") dupServer := NewServer(cfg) - err = dupServer.Start() - c.Assert(terror.ErrMasterStartService.Equal(err), check.IsTrue) + err = dupServer.Start(ctx2) + c.Assert(terror.ErrMasterStartEmbedEtcdFail.Equal(err), check.IsTrue) c.Assert(err.Error(), check.Matches, ".*bind: address already in use") // close + cancel2() s.Close() c.Assert(utils.WaitSomething(30, 10*time.Millisecond, func() bool { diff --git a/pkg/terror/error_list.go b/pkg/terror/error_list.go index c8317f3515..ff63629db4 100644 --- a/pkg/terror/error_list.go +++ b/pkg/terror/error_list.go @@ -381,6 +381,9 @@ const ( codeMasterOperRequestTimeout codeMasterHandleHTTPApis codeMasterHostPortNotValid + codeMasterGetHostnameFail + codeMasterGenEmbedEtcdConfigFail + codeMasterStartEmbedEtcdFail ) // DM-worker error code @@ -783,41 +786,44 @@ var ( ErrSyncerUnitExecWithNoBlockingDDL = New(codeSyncerUnitExecWithNoBlockingDDL, ClassSyncUnit, ScopeInternal, LevelHigh, "process unit not waiting for sharding DDL to sync") // DM-master error - ErrMasterSQLOpNilRequest = New(codeMasterSQLOpNilRequest, ClassDMMaster, ScopeInternal, LevelMedium, "nil request not valid") - ErrMasterSQLOpNotSupport = New(codeMasterSQLOpNotSupport, ClassDMMaster, ScopeInternal, LevelMedium, "op %s not supported") - ErrMasterSQLOpWithoutSharding = New(codeMasterSQLOpWithoutSharding, ClassDMMaster, ScopeInternal, LevelMedium, "operate request without --sharding specified not valid") - ErrMasterGRPCCreateConn = New(codeMasterGRPCCreateConn, ClassDMMaster, ScopeInternal, LevelHigh, "create grpc connection") - ErrMasterGRPCSendOnCloseConn = New(codeMasterGRPCSendOnCloseConn, ClassDMMaster, ScopeInternal, LevelHigh, "send request on a closed client") - ErrMasterGRPCClientClose = New(codeMasterGRPCClientClose, ClassDMMaster, ScopeInternal, LevelHigh, "close rpc client") - ErrMasterGRPCInvalidReqType = New(codeMasterGRPCInvalidReqType, ClassDMMaster, ScopeInternal, LevelHigh, "invalid request type: %v") - ErrMasterGRPCRequestError = New(codeMasterGRPCRequestError, ClassDMMaster, ScopeInternal, LevelHigh, "grpc request error") - ErrMasterDeployMapperVerify = New(codeMasterDeployMapperVerify, ClassDMMaster, ScopeInternal, LevelHigh, "user should specify valid relation between source(mysql/mariadb) and dm-worker, config %+v not valid") - ErrMasterConfigParseFlagSet = New(codeMasterConfigParseFlagSet, ClassDMMaster, ScopeInternal, LevelMedium, "parse config flag set") - ErrMasterConfigUnknownItem = New(codeMasterConfigUnknownItem, ClassDMMaster, ScopeInternal, LevelMedium, "master config contained unknown configuration options: %s") - ErrMasterConfigInvalidFlag = New(codeMasterConfigInvalidFlag, ClassDMMaster, ScopeInternal, LevelMedium, "'%s' is an invalid flag") - ErrMasterConfigTomlTransform = New(codeMasterConfigTomlTransform, ClassDMMaster, ScopeInternal, LevelMedium, "config toml transform") - ErrMasterConfigTimeoutParse = New(codeMasterConfigTimeoutParse, ClassDMMaster, ScopeInternal, LevelMedium, "parse rpc timeout str") - ErrMasterConfigUpdateCfgFile = New(codeMasterConfigUpdateCfgFile, ClassDMMaster, ScopeInternal, LevelHigh, "update config file") - ErrMasterShardingDDLDiff = New(codeMasterShardingDDLDiff, ClassDMMaster, ScopeInternal, LevelHigh, "sharding ddls in ddl lock %s is different with %s") - ErrMasterStartService = New(codeMasterStartService, ClassDMMaster, ScopeInternal, LevelHigh, "start server") - ErrMasterNoEmitToken = New(codeMasterNoEmitToken, ClassDMMaster, ScopeInternal, LevelHigh, "fail to get emit opportunity for worker %s") - ErrMasterLockNotFound = New(codeMasterLockNotFound, ClassDMMaster, ScopeInternal, LevelHigh, "lock with ID %s not found") - ErrMasterLockIsResolving = New(codeMasterLockIsResolving, ClassDMMaster, ScopeInternal, LevelHigh, "lock %s is resolving") - ErrMasterWorkerCliNotFound = New(codeMasterWorkerCliNotFound, ClassDMMaster, ScopeInternal, LevelHigh, "worker %s relevant worker-client not found") - ErrMasterWorkerNotWaitLock = New(codeMasterWorkerNotWaitLock, ClassDMMaster, ScopeInternal, LevelHigh, "worker %s not waiting for DDL lock %s") - ErrMasterHandleSQLReqFail = New(codeMasterHandleSQLReqFail, ClassDMMaster, ScopeInternal, LevelHigh, "request DDL lock %s owner %s handle SQLs request %s fail %s") - ErrMasterOwnerExecDDL = New(codeMasterOwnerExecDDL, ClassDMMaster, ScopeInternal, LevelHigh, "owner %s ExecuteDDL fail") - ErrMasterPartWorkerExecDDLFail = New(codeMasterPartWorkerExecDDLFail, ClassDMMaster, ScopeInternal, LevelHigh, "DDL lock %s owner ExecuteDDL successfully, so DDL lock removed. but some dm-workers ExecuteDDL fail, you should to handle dm-worker directly") - ErrMasterWorkerExistDDLLock = New(codeMasterWorkerExistDDLLock, ClassDMMaster, ScopeInternal, LevelHigh, "worker %s exist ddl lock, please unlock ddl lock first") - ErrMasterGetWorkerCfgExtractor = New(codeMasterGetWorkerCfgExtractor, ClassDMMaster, ScopeInternal, LevelHigh, "") - ErrMasterTaskConfigExtractor = New(codeMasterTaskConfigExtractor, ClassDMMaster, ScopeInternal, LevelHigh, "") - ErrMasterWorkerArgsExtractor = New(codeMasterWorkerArgsExtractor, ClassDMMaster, ScopeInternal, LevelHigh, "") - ErrMasterQueryWorkerConfig = New(codeMasterQueryWorkerConfig, ClassDMMaster, ScopeInternal, LevelHigh, "") - ErrMasterOperNotFound = New(codeMasterOperNotFound, ClassDMMaster, ScopeInternal, LevelHigh, "operation %d of task %s not found, please execute `query-status` to check status") - ErrMasterOperRespNotSuccess = New(codeMasterOperRespNotSuccess, ClassDMMaster, ScopeInternal, LevelHigh, "operation not success: %s") - ErrMasterOperRequestTimeout = New(codeMasterOperRequestTimeout, ClassDMMaster, ScopeInternal, LevelHigh, "request is timeout, but request may be successful, please execute `query-status` to check status") - ErrMasterHandleHTTPApis = New(codeMasterHandleHTTPApis, ClassDMMaster, ScopeInternal, LevelHigh, "serve http apis to grpc") - ErrMasterHostPortNotValid = New(codeMasterHostPortNotValid, ClassDMMaster, ScopeInternal, LevelHigh, "host:port '%s' not valid") + ErrMasterSQLOpNilRequest = New(codeMasterSQLOpNilRequest, ClassDMMaster, ScopeInternal, LevelMedium, "nil request not valid") + ErrMasterSQLOpNotSupport = New(codeMasterSQLOpNotSupport, ClassDMMaster, ScopeInternal, LevelMedium, "op %s not supported") + ErrMasterSQLOpWithoutSharding = New(codeMasterSQLOpWithoutSharding, ClassDMMaster, ScopeInternal, LevelMedium, "operate request without --sharding specified not valid") + ErrMasterGRPCCreateConn = New(codeMasterGRPCCreateConn, ClassDMMaster, ScopeInternal, LevelHigh, "create grpc connection") + ErrMasterGRPCSendOnCloseConn = New(codeMasterGRPCSendOnCloseConn, ClassDMMaster, ScopeInternal, LevelHigh, "send request on a closed client") + ErrMasterGRPCClientClose = New(codeMasterGRPCClientClose, ClassDMMaster, ScopeInternal, LevelHigh, "close rpc client") + ErrMasterGRPCInvalidReqType = New(codeMasterGRPCInvalidReqType, ClassDMMaster, ScopeInternal, LevelHigh, "invalid request type: %v") + ErrMasterGRPCRequestError = New(codeMasterGRPCRequestError, ClassDMMaster, ScopeInternal, LevelHigh, "grpc request error") + ErrMasterDeployMapperVerify = New(codeMasterDeployMapperVerify, ClassDMMaster, ScopeInternal, LevelHigh, "user should specify valid relation between source(mysql/mariadb) and dm-worker, config %+v not valid") + ErrMasterConfigParseFlagSet = New(codeMasterConfigParseFlagSet, ClassDMMaster, ScopeInternal, LevelMedium, "parse config flag set") + ErrMasterConfigUnknownItem = New(codeMasterConfigUnknownItem, ClassDMMaster, ScopeInternal, LevelMedium, "master config contained unknown configuration options: %s") + ErrMasterConfigInvalidFlag = New(codeMasterConfigInvalidFlag, ClassDMMaster, ScopeInternal, LevelMedium, "'%s' is an invalid flag") + ErrMasterConfigTomlTransform = New(codeMasterConfigTomlTransform, ClassDMMaster, ScopeInternal, LevelMedium, "config toml transform") + ErrMasterConfigTimeoutParse = New(codeMasterConfigTimeoutParse, ClassDMMaster, ScopeInternal, LevelMedium, "parse rpc timeout str") + ErrMasterConfigUpdateCfgFile = New(codeMasterConfigUpdateCfgFile, ClassDMMaster, ScopeInternal, LevelHigh, "update config file") + ErrMasterShardingDDLDiff = New(codeMasterShardingDDLDiff, ClassDMMaster, ScopeInternal, LevelHigh, "sharding ddls in ddl lock %s is different with %s") + ErrMasterStartService = New(codeMasterStartService, ClassDMMaster, ScopeInternal, LevelHigh, "start server") + ErrMasterNoEmitToken = New(codeMasterNoEmitToken, ClassDMMaster, ScopeInternal, LevelHigh, "fail to get emit opportunity for worker %s") + ErrMasterLockNotFound = New(codeMasterLockNotFound, ClassDMMaster, ScopeInternal, LevelHigh, "lock with ID %s not found") + ErrMasterLockIsResolving = New(codeMasterLockIsResolving, ClassDMMaster, ScopeInternal, LevelHigh, "lock %s is resolving") + ErrMasterWorkerCliNotFound = New(codeMasterWorkerCliNotFound, ClassDMMaster, ScopeInternal, LevelHigh, "worker %s relevant worker-client not found") + ErrMasterWorkerNotWaitLock = New(codeMasterWorkerNotWaitLock, ClassDMMaster, ScopeInternal, LevelHigh, "worker %s not waiting for DDL lock %s") + ErrMasterHandleSQLReqFail = New(codeMasterHandleSQLReqFail, ClassDMMaster, ScopeInternal, LevelHigh, "request DDL lock %s owner %s handle SQLs request %s fail %s") + ErrMasterOwnerExecDDL = New(codeMasterOwnerExecDDL, ClassDMMaster, ScopeInternal, LevelHigh, "owner %s ExecuteDDL fail") + ErrMasterPartWorkerExecDDLFail = New(codeMasterPartWorkerExecDDLFail, ClassDMMaster, ScopeInternal, LevelHigh, "DDL lock %s owner ExecuteDDL successfully, so DDL lock removed. but some dm-workers ExecuteDDL fail, you should to handle dm-worker directly") + ErrMasterWorkerExistDDLLock = New(codeMasterWorkerExistDDLLock, ClassDMMaster, ScopeInternal, LevelHigh, "worker %s exist ddl lock, please unlock ddl lock first") + ErrMasterGetWorkerCfgExtractor = New(codeMasterGetWorkerCfgExtractor, ClassDMMaster, ScopeInternal, LevelHigh, "") + ErrMasterTaskConfigExtractor = New(codeMasterTaskConfigExtractor, ClassDMMaster, ScopeInternal, LevelHigh, "") + ErrMasterWorkerArgsExtractor = New(codeMasterWorkerArgsExtractor, ClassDMMaster, ScopeInternal, LevelHigh, "") + ErrMasterQueryWorkerConfig = New(codeMasterQueryWorkerConfig, ClassDMMaster, ScopeInternal, LevelHigh, "") + ErrMasterOperNotFound = New(codeMasterOperNotFound, ClassDMMaster, ScopeInternal, LevelHigh, "operation %d of task %s not found, please execute `query-status` to check status") + ErrMasterOperRespNotSuccess = New(codeMasterOperRespNotSuccess, ClassDMMaster, ScopeInternal, LevelHigh, "operation not success: %s") + ErrMasterOperRequestTimeout = New(codeMasterOperRequestTimeout, ClassDMMaster, ScopeInternal, LevelHigh, "request is timeout, but request may be successful, please execute `query-status` to check status") + ErrMasterHandleHTTPApis = New(codeMasterHandleHTTPApis, ClassDMMaster, ScopeInternal, LevelHigh, "serve http apis to grpc") + ErrMasterHostPortNotValid = New(codeMasterHostPortNotValid, ClassDMMaster, ScopeInternal, LevelHigh, "host:port '%s' not valid") + ErrMasterGetHostnameFail = New(codeMasterGetHostnameFail, ClassDMMaster, ScopeInternal, LevelHigh, "get hostname fail") + ErrMasterGenEmbedEtcdConfigFail = New(codeMasterGenEmbedEtcdConfigFail, ClassDMMaster, ScopeInternal, LevelHigh, "generate config item %s for embed etcd fail") + ErrMasterStartEmbedEtcdFail = New(codeMasterStartEmbedEtcdFail, ClassDMMaster, ScopeInternal, LevelHigh, "start embed etcd fail") // DM-worker error ErrWorkerParseFlagSet = New(codeWorkerParseFlagSet, ClassDMWorker, ScopeInternal, LevelMedium, "parse dm-worker config flag set") From 466550b566df9e2dc4a891054efbcea25baf0c8f Mon Sep 17 00:00:00 2001 From: csuzhangxc Date: Mon, 4 Nov 2019 19:39:34 +0800 Subject: [PATCH 03/27] *: add test case for `parseURLs` --- _utils/terror_gen/errors_release.txt | 1 + dm/master/config.go | 16 +++++--- dm/master/config_test.go | 61 ++++++++++++++++++++++++++-- pkg/terror/error_list.go | 2 + 4 files changed, 70 insertions(+), 10 deletions(-) diff --git a/_utils/terror_gen/errors_release.txt b/_utils/terror_gen/errors_release.txt index d0e842ce31..e779c24322 100644 --- a/_utils/terror_gen/errors_release.txt +++ b/_utils/terror_gen/errors_release.txt @@ -315,6 +315,7 @@ ErrMasterHostPortNotValid,[code=38035:class=dm-master:scope=internal:level=high] ErrMasterGetHostnameFail,[code=38036:class=dm-master:scope=internal:level=high],"get hostname fail" ErrMasterGenEmbedEtcdConfigFail,[code=38037:class=dm-master:scope=internal:level=high],"generate config item %s for embed etcd fail" ErrMasterStartEmbedEtcdFail,[code=38038:class=dm-master:scope=internal:level=high],"start embed etcd fail" +ErrMasterParseURLFail,[code=38039:class=dm-master:scope=internal:level=high],"parse URL %s fail" ErrWorkerParseFlagSet,[code=40001:class=dm-worker:scope=internal:level=medium],"parse dm-worker config flag set" ErrWorkerInvalidFlag,[code=40002:class=dm-worker:scope=internal:level=medium],"'%s' is an invalid flag" ErrWorkerDecodeConfigFromFile,[code=40003:class=dm-worker:scope=internal:level=medium],"toml decode file" diff --git a/dm/master/config.go b/dm/master/config.go index dd71a64e31..be21f22245 100644 --- a/dm/master/config.go +++ b/dm/master/config.go @@ -286,19 +286,19 @@ func (c *Config) genEmbedEtcdConfig() (*embed.Config, error) { cfg.Dir = c.DataDir // reuse the previous master-addr as the client listening URL. - cURL, err := parseUrls(c.MasterAddr) + cURL, err := parseURLs(c.MasterAddr) if err != nil { return nil, terror.ErrMasterGenEmbedEtcdConfigFail.Delegate(err, "--listen-client-urls/--advertise-client-urls") } cfg.LCUrls = cURL cfg.ACUrls = cURL - cfg.LPUrls, err = parseUrls(c.PeerUrls) + cfg.LPUrls, err = parseURLs(c.PeerUrls) if err != nil { return nil, terror.ErrMasterGenEmbedEtcdConfigFail.Delegate(err, "--listen-peer-urls") } - cfg.APUrls, err = parseUrls(c.AdvertisePeerUrls) + cfg.APUrls, err = parseURLs(c.AdvertisePeerUrls) if err != nil { return nil, terror.ErrMasterGenEmbedEtcdConfigFail.Delegate(err, "--initial-advertise-peer-urls") } @@ -308,10 +308,14 @@ func (c *Config) genEmbedEtcdConfig() (*embed.Config, error) { return cfg, nil } -// parseUrls parse a string into multiple urls. +// parseURLs parse a string into multiple urls. // if the URL in the string without protocol scheme, use `http` as the default. // if no IP exists in the address, `0.0.0.0` is used. -func parseUrls(s string) ([]url.URL, error) { +func parseURLs(s string) ([]url.URL, error) { + if s == "" { + return nil, nil + } + items := strings.Split(s, ",") urls := make([]url.URL, 0, len(items)) for _, item := range items { @@ -320,7 +324,7 @@ func parseUrls(s string) ([]url.URL, error) { u, err = url.Parse("http://" + item) } if err != nil { - return nil, err + return nil, terror.ErrMasterParseURLFail.Delegate(err, item) } if strings.Index(u.Host, ":") == 0 { u.Host = "0.0.0.0" + u.Host diff --git a/dm/master/config_test.go b/dm/master/config_test.go index 543a74335c..beb6f86590 100644 --- a/dm/master/config_test.go +++ b/dm/master/config_test.go @@ -18,18 +18,25 @@ import ( "flag" "fmt" "io/ioutil" + "net/url" "path" "strings" capturer "github.com/kami-zh/go-capturer" "github.com/pingcap/check" + + "github.com/pingcap/dm/pkg/terror" ) var ( defaultConfigFile = "./dm-master.toml" + _ = check.Suite(&testConfigSuite{}) ) -func (t *testMaster) TestPrintSampleConfig(c *check.C) { +type testConfigSuite struct { +} + +func (t *testConfigSuite) TestPrintSampleConfig(c *check.C) { var ( buf []byte err error @@ -65,7 +72,7 @@ func (t *testMaster) TestPrintSampleConfig(c *check.C) { c.Assert(strings.TrimSpace(out), check.Matches, "base64 decode config error:.*") } -func (t *testMaster) TestConfig(c *check.C) { +func (t *testConfigSuite) TestConfig(c *check.C) { var ( err error cfg = &Config{} @@ -121,7 +128,7 @@ func (t *testMaster) TestConfig(c *check.C) { } } -func (t *testMaster) TestUpdateConfigFile(c *check.C) { +func (t *testConfigSuite) TestUpdateConfigFile(c *check.C) { var ( err error content []byte @@ -153,7 +160,7 @@ func (t *testMaster) TestUpdateConfigFile(c *check.C) { c.Assert(newContent, check.DeepEquals, content) } -func (t *testMaster) TestInvalidConfig(c *check.C) { +func (t *testConfigSuite) TestInvalidConfig(c *check.C) { var ( err error cfg = NewConfig() @@ -195,3 +202,49 @@ dm-worker = "172.16.10.72:8262"`) c.Assert(err, check.NotNil) c.Assert(err, check.ErrorMatches, "*master config contained unknown configuration options: aaa*") } +func (t *testConfigSuite) TestParseURLs(c *check.C) { + cases := []struct { + str string + urls []url.URL + hasErr bool + }{ + {}, // empty str + { + str: "http://127.0.0.1:8269", + urls: []url.URL{{Scheme: "http", Host: "127.0.0.1:8269"}}, + }, + { + str: "http://127.0.0.1:8269,http://127.0.0.1:18269", + urls: []url.URL{ + {Scheme: "http", Host: "127.0.0.1:8269"}, + {Scheme: "http", Host: "127.0.0.1:18269"}, + }, + }, + { + str: "127.0.0.1:8269", // no scheme, but url.Parse can't handle it now. + hasErr: true, + }, + { + str: ":8269", // no scheme and IP + urls: []url.URL{{Scheme: "http", Host: "0.0.0.0:8269"}}, + }, + { + str: ":8269,http://127.0.0.1:18269", + urls: []url.URL{ + {Scheme: "http", Host: "0.0.0.0:8269"}, + {Scheme: "http", Host: "127.0.0.1:18269"}, + }, + }, + } + + for _, cs := range cases { + c.Logf("raw string %s", cs.str) + urls, err := parseURLs(cs.str) + if cs.hasErr { + c.Assert(terror.ErrMasterParseURLFail.Equal(err), check.IsTrue) + } else { + c.Assert(err, check.IsNil) + c.Assert(urls, check.DeepEquals, cs.urls) + } + } +} diff --git a/pkg/terror/error_list.go b/pkg/terror/error_list.go index ff63629db4..bbc29d3396 100644 --- a/pkg/terror/error_list.go +++ b/pkg/terror/error_list.go @@ -384,6 +384,7 @@ const ( codeMasterGetHostnameFail codeMasterGenEmbedEtcdConfigFail codeMasterStartEmbedEtcdFail + codeMasterParseURLFail ) // DM-worker error code @@ -824,6 +825,7 @@ var ( ErrMasterGetHostnameFail = New(codeMasterGetHostnameFail, ClassDMMaster, ScopeInternal, LevelHigh, "get hostname fail") ErrMasterGenEmbedEtcdConfigFail = New(codeMasterGenEmbedEtcdConfigFail, ClassDMMaster, ScopeInternal, LevelHigh, "generate config item %s for embed etcd fail") ErrMasterStartEmbedEtcdFail = New(codeMasterStartEmbedEtcdFail, ClassDMMaster, ScopeInternal, LevelHigh, "start embed etcd fail") + ErrMasterParseURLFail = New(codeMasterParseURLFail, ClassDMMaster, ScopeInternal, LevelHigh, "parse URL %s fail") // DM-worker error ErrWorkerParseFlagSet = New(codeWorkerParseFlagSet, ClassDMWorker, ScopeInternal, LevelMedium, "parse dm-worker config flag set") From f78762cdc4b1d36da37c8e88150cbe784363b183 Mon Sep 17 00:00:00 2001 From: csuzhangxc Date: Mon, 4 Nov 2019 21:06:16 +0800 Subject: [PATCH 04/27] *: refine config; add more test cases --- dm/master/config.go | 26 +++++++++++----- dm/master/config_test.go | 64 +++++++++++++++++++++++++++++++++++----- dm/master/dm-master.toml | 15 ++++++++++ dm/master/server.go | 43 +-------------------------- dm/master/server_test.go | 17 +++-------- 5 files changed, 95 insertions(+), 70 deletions(-) diff --git a/dm/master/config.go b/dm/master/config.go index be21f22245..28b68e7b0b 100644 --- a/dm/master/config.go +++ b/dm/master/config.go @@ -19,6 +19,7 @@ import ( "flag" "fmt" "io/ioutil" + "net" "net/url" "os" "strings" @@ -34,9 +35,10 @@ import ( ) const ( - defaultRPCTimeout = "30s" - defaultNamePrefix = "dm-master" - defaultPeerUrls = "http://127.0.0.1:8269" + defaultRPCTimeout = "30s" + defaultNamePrefix = "dm-master" + defaultDataDirPrefix = "default" + defaultPeerUrls = "http://127.0.0.1:8269" ) // SampleConfigFile is sample config file of dm-master @@ -59,10 +61,10 @@ func NewConfig() *Config { //fs.StringVar(&cfg.LogRotate, "log-rotate", "day", "log file rotate type, hour/day") fs.StringVar(&cfg.Name, "name", "", "human-readable name for this DM-master member") - fs.StringVar(&cfg.DataDir, "data-dir", "", "path to the data directory (default 'default.${name}')") + fs.StringVar(&cfg.DataDir, "data-dir", "", `path to the data directory (default "default.${name}")`) fs.StringVar(&cfg.InitialCluster, "initial-cluster", "", fmt.Sprintf("initial cluster configuration for bootstrapping, e,g. dm-master=%s", defaultPeerUrls)) fs.StringVar(&cfg.PeerUrls, "peer-urls", defaultPeerUrls, "URLs for peer traffic") - fs.StringVar(&cfg.AdvertisePeerUrls, "advertise-peer-urls", "", "advertise URLs for peer traffic (default '${peer-urls}')") + fs.StringVar(&cfg.AdvertisePeerUrls, "advertise-peer-urls", "", `advertise URLs for peer traffic (default "${peer-urls}")`) return cfg } @@ -191,6 +193,12 @@ func (c *Config) configFromFile(path string) error { // adjust adjusts configs func (c *Config) adjust() error { + // MasterAddr's format may be "host:port" or ":port" + _, _, err := net.SplitHostPort(c.MasterAddr) + if err != nil { + return terror.ErrMasterHostPortNotValid.Delegate(err, c.MasterAddr) + } + c.DeployMap = make(map[string]string) for _, item := range c.Deploy { if err := item.Verify(); err != nil { @@ -233,7 +241,7 @@ func (c *Config) adjust() error { } if c.DataDir == "" { - c.DataDir = fmt.Sprintf("default.%s", c.Name) + c.DataDir = fmt.Sprintf("%s.%s", defaultDataDirPrefix, c.Name) } if c.PeerUrls == "" { @@ -252,7 +260,8 @@ func (c *Config) adjust() error { c.InitialCluster = strings.Join(items, ",") } - return nil + _, err = c.genEmbedEtcdConfig() // verify embed etcd config + return err } // UpdateConfigFile write config to local file @@ -320,7 +329,8 @@ func parseURLs(s string) ([]url.URL, error) { urls := make([]url.URL, 0, len(items)) for _, item := range items { u, err := url.Parse(item) - if err != nil && strings.Contains(err.Error(), "missing protocol scheme") { + if err != nil && (strings.Contains(err.Error(), "missing protocol scheme") || + strings.Contains(err.Error(), "first path segment in URL cannot contain colon")) { u, err = url.Parse("http://" + item) } if err != nil { diff --git a/dm/master/config_test.go b/dm/master/config_test.go index beb6f86590..d5ec9b7b21 100644 --- a/dm/master/config_test.go +++ b/dm/master/config_test.go @@ -19,6 +19,7 @@ import ( "fmt" "io/ioutil" "net/url" + "os" "path" "strings" @@ -74,10 +75,15 @@ func (t *testConfigSuite) TestPrintSampleConfig(c *check.C) { func (t *testConfigSuite) TestConfig(c *check.C) { var ( - err error - cfg = &Config{} - masterAddr = ":8261" - deployMap = map[string]string{ + err error + cfg = &Config{} + masterAddr = ":8261" + name = "dm-master" + dataDir = "default.dm-master" + peerURLs = "http://127.0.0.1:8269" + advertisePeerURLs = "http://127.0.0.1:8269" + initialCluster = "dm-master=http://127.0.0.1:8269" + deployMap = map[string]string{ "mysql-replica-01": "172.16.10.72:8262", "mysql-replica-02": "172.16.10.73:8262", } @@ -122,6 +128,11 @@ func (t *testConfigSuite) TestConfig(c *check.C) { c.Assert(err, check.ErrorMatches, tc.errorReg) } else { c.Assert(cfg.MasterAddr, check.Equals, masterAddr) + c.Assert(cfg.Name, check.Equals, name) + c.Assert(cfg.DataDir, check.Equals, dataDir) + c.Assert(cfg.PeerUrls, check.Equals, peerURLs) + c.Assert(cfg.AdvertisePeerUrls, check.Equals, advertisePeerURLs) + c.Assert(cfg.InitialCluster, check.Equals, initialCluster) c.Assert(cfg.DeployMap, check.DeepEquals, deployMap) c.Assert(cfg.String(), check.Matches, fmt.Sprintf("{.*master-addr\":\"%s\".*}", masterAddr)) } @@ -201,7 +212,34 @@ dm-worker = "172.16.10.72:8262"`) err = cfg.configFromFile(filepath2) c.Assert(err, check.NotNil) c.Assert(err, check.ErrorMatches, "*master config contained unknown configuration options: aaa*") + + // invalid `master-addr` + filepath3 := path.Join(c.MkDir(), "test_invalid_config.toml") + configContent3 := []byte(`master-addr = ""`) + err = ioutil.WriteFile(filepath3, configContent3, 0644) + err = cfg.configFromFile(filepath3) + c.Assert(err, check.IsNil) + c.Assert(terror.ErrMasterHostPortNotValid.Equal(cfg.adjust()), check.IsTrue) +} + +func (t *testConfigSuite) TestGenEmbedEtcdConfig(c *check.C) { + hostname, err := os.Hostname() + c.Assert(err, check.IsNil) + + cfg1 := NewConfig() + cfg1.MasterAddr = ":8261" + c.Assert(cfg1.adjust(), check.IsNil) + etcdCfg, err := cfg1.genEmbedEtcdConfig() + c.Assert(err, check.IsNil) + c.Assert(etcdCfg.Name, check.Equals, fmt.Sprintf("dm-master-%s", hostname)) + c.Assert(etcdCfg.Dir, check.Equals, fmt.Sprintf("default.%s", etcdCfg.Name)) + c.Assert(etcdCfg.LCUrls, check.DeepEquals, []url.URL{{Scheme: "http", Host: "0.0.0.0:8261"}}) + c.Assert(etcdCfg.ACUrls, check.DeepEquals, []url.URL{{Scheme: "http", Host: "0.0.0.0:8261"}}) + c.Assert(etcdCfg.LPUrls, check.DeepEquals, []url.URL{{Scheme: "http", Host: "127.0.0.1:8269"}}) + c.Assert(etcdCfg.APUrls, check.DeepEquals, []url.URL{{Scheme: "http", Host: "127.0.0.1:8269"}}) + c.Assert(etcdCfg.InitialCluster, check.DeepEquals, fmt.Sprintf("dm-master-%s=http://127.0.0.1:8269", hostname)) } + func (t *testConfigSuite) TestParseURLs(c *check.C) { cases := []struct { str string @@ -221,13 +259,25 @@ func (t *testConfigSuite) TestParseURLs(c *check.C) { }, }, { - str: "127.0.0.1:8269", // no scheme, but url.Parse can't handle it now. - hasErr: true, + str: "127.0.0.1:8269", // no scheme + urls: []url.URL{{Scheme: "http", Host: "127.0.0.1:8269"}}, }, { - str: ":8269", // no scheme and IP + str: "http://:8269", // no IP urls: []url.URL{{Scheme: "http", Host: "0.0.0.0:8269"}}, }, + { + str: ":8269", // no scheme, no IP + urls: []url.URL{{Scheme: "http", Host: "0.0.0.0:8269"}}, + }, + { + str: "http://", // no IP, no port + urls: []url.URL{{Scheme: "http", Host: ""}}, + }, + { + str: "http://\n127.0.0.1:8269", // invalid char in URL + hasErr: true, + }, { str: ":8269,http://127.0.0.1:18269", urls: []url.URL{ diff --git a/dm/master/dm-master.toml b/dm/master/dm-master.toml index e63ea190b6..50f8308e9d 100644 --- a/dm/master/dm-master.toml +++ b/dm/master/dm-master.toml @@ -21,6 +21,21 @@ log-file = "dm-master.log" #dm-master listen address master-addr = ":8261" +# human-readable name for this DM-master member +name = "dm-master" + +# path to the data directory (default 'default.${name}') +data-dir = "default.dm-master" + +# URLs for peer traffic +peer-urls = "http://127.0.0.1:8269" + +# advertise URLs for peer traffic (default '${peer-urls}') +advertise-peer-urls = "http://127.0.0.1:8269" + +# initial cluster configuration for bootstrapping, e,g. dm-master=http://127.0.0.1:8269 +initial-cluster = "dm-master=http://127.0.0.1:8269" + # replication group <-> dm-Worker deployment, we'll refine it when new deployment function is available [[deploy]] source-id = "mysql-replica-01" diff --git a/dm/master/server.go b/dm/master/server.go index a9a8d326af..5f6f315632 100644 --- a/dm/master/server.go +++ b/dm/master/server.go @@ -17,14 +17,12 @@ import ( "context" "fmt" "io" - "net" "net/http" "sort" "strings" "sync" "time" - "github.com/grpc-ecosystem/grpc-gateway/runtime" "github.com/pingcap/errors" "github.com/siddontang/go/sync2" "go.etcd.io/etcd/embed" @@ -96,13 +94,7 @@ func NewServer(cfg *Config) *Server { } // Start starts to serving -func (s *Server) Start(ctx context.Context) error { - // TODO: check config in config.go? - _, _, err := s.splitHostPort() - if err != nil { - return err - } - +func (s *Server) Start(ctx context.Context) (err error) { // create clients to DM-workers for _, workerAddr := range s.cfg.DeployMap { s.workerClients[workerAddr], err = workerrpc.NewGRPCClient(workerAddr) @@ -1974,36 +1966,3 @@ func (s *Server) workerArgsExtractor(args ...interface{}) (workerrpc.Client, str return cli, worker, nil } - -// HandleHTTPApis handles http apis and translate to grpc request -func (s *Server) HandleHTTPApis(ctx context.Context, mux *http.ServeMux) error { - // MasterAddr's format may be "host:port" or "":port" - _, port, err := s.splitHostPort() - if err != nil { - return err - } - - opts := []grpc.DialOption{grpc.WithInsecure()} - conn, err := grpc.DialContext(ctx, "127.0.0.1:"+port, opts...) - if err != nil { - return terror.ErrMasterHandleHTTPApis.Delegate(err) - } - - gwmux := runtime.NewServeMux() - err = pb.RegisterMasterHandler(ctx, gwmux, conn) - if err != nil { - return terror.ErrMasterHandleHTTPApis.Delegate(err) - } - mux.Handle("/apis/", gwmux) - - return nil -} - -func (s *Server) splitHostPort() (host, port string, err error) { - // MasterAddr's format may be "host:port" or ":port" - host, port, err = net.SplitHostPort(s.cfg.MasterAddr) - if err != nil { - err = terror.ErrMasterHostPortNotValid.Delegate(err, s.cfg.MasterAddr) - } - return -} diff --git a/dm/master/server_test.go b/dm/master/server_test.go index 9ca95c3f6d..52bdd4d7ac 100644 --- a/dm/master/server_test.go +++ b/dm/master/server_test.go @@ -1491,28 +1491,19 @@ func (t *testMaster) TestServer(c *check.C) { s := NewServer(cfg) - masterAddr := cfg.MasterAddr - s.cfg.MasterAddr = "" - ctx1, cancel1 := context.WithCancel(context.Background()) - err := s.Start(ctx1) - c.Assert(terror.ErrMasterHostPortNotValid.Equal(err), check.IsTrue) - cancel1() - s.Close() - s.cfg.MasterAddr = masterAddr - - ctx2, cancel2 := context.WithCancel(context.Background()) - err1 := s.Start(ctx2) + ctx, cancel := context.WithCancel(context.Background()) + err1 := s.Start(ctx) c.Assert(err1, check.IsNil) t.testHTTPInterface(c, "status") dupServer := NewServer(cfg) - err = dupServer.Start(ctx2) + err := dupServer.Start(ctx) c.Assert(terror.ErrMasterStartEmbedEtcdFail.Equal(err), check.IsTrue) c.Assert(err.Error(), check.Matches, ".*bind: address already in use") // close - cancel2() + cancel() s.Close() c.Assert(utils.WaitSomething(30, 10*time.Millisecond, func() bool { From 3ad9ff2941139b4ed884a5f86ea348c00389502e Mon Sep 17 00:00:00 2001 From: csuzhangxc Date: Tue, 5 Nov 2019 10:19:35 +0800 Subject: [PATCH 05/27] tests: fix CI --- tests/dmctl_basic/conf/dm-master.toml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/dmctl_basic/conf/dm-master.toml b/tests/dmctl_basic/conf/dm-master.toml index 334e0de993..b08f4d77d5 100644 --- a/tests/dmctl_basic/conf/dm-master.toml +++ b/tests/dmctl_basic/conf/dm-master.toml @@ -1,5 +1,7 @@ # Master Configuration. +master-addr = ":8261" + [[deploy]] source-id = "mysql-replica-01" dm-worker = "127.0.0.1:8262" From 675a1c2f1d37b54f1cb8ebbbcb1ff7f79a5f7f5e Mon Sep 17 00:00:00 2001 From: csuzhangxc Date: Tue, 5 Nov 2019 11:46:40 +0800 Subject: [PATCH 06/27] master: add more test cases for embed etcd config --- dm/master/config_test.go | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/dm/master/config_test.go b/dm/master/config_test.go index d5ec9b7b21..4b74c7f5be 100644 --- a/dm/master/config_test.go +++ b/dm/master/config_test.go @@ -238,6 +238,37 @@ func (t *testConfigSuite) TestGenEmbedEtcdConfig(c *check.C) { c.Assert(etcdCfg.LPUrls, check.DeepEquals, []url.URL{{Scheme: "http", Host: "127.0.0.1:8269"}}) c.Assert(etcdCfg.APUrls, check.DeepEquals, []url.URL{{Scheme: "http", Host: "127.0.0.1:8269"}}) c.Assert(etcdCfg.InitialCluster, check.DeepEquals, fmt.Sprintf("dm-master-%s=http://127.0.0.1:8269", hostname)) + + cfg2 := *cfg1 + cfg2.MasterAddr = "127.0.0.1\n:8261" + _, err = cfg2.genEmbedEtcdConfig() + c.Assert(terror.ErrMasterGenEmbedEtcdConfigFail.Equal(err), check.IsTrue) + c.Assert(err, check.ErrorMatches, "(?m).*--listen-client-urls/--advertise-client-urls.*") + cfg2.MasterAddr = "172.100.8.8:8261" + etcdCfg, err = cfg2.genEmbedEtcdConfig() + c.Assert(err, check.IsNil) + c.Assert(etcdCfg.LCUrls, check.DeepEquals, []url.URL{{Scheme: "http", Host: "172.100.8.8:8261"}}) + c.Assert(etcdCfg.ACUrls, check.DeepEquals, []url.URL{{Scheme: "http", Host: "172.100.8.8:8261"}}) + + cfg3 := *cfg1 + cfg3.PeerUrls = "127.0.0.1:\n8269" + _, err = cfg3.genEmbedEtcdConfig() + c.Assert(terror.ErrMasterGenEmbedEtcdConfigFail.Equal(err), check.IsTrue) + c.Assert(err, check.ErrorMatches, "(?m).*--listen-peer-urls.*") + cfg3.PeerUrls = "http://172.100.8.8:8269" + etcdCfg, err = cfg3.genEmbedEtcdConfig() + c.Assert(err, check.IsNil) + c.Assert(etcdCfg.LPUrls, check.DeepEquals, []url.URL{{Scheme: "http", Host: "172.100.8.8:8269"}}) + + cfg4 := *cfg1 + cfg4.AdvertisePeerUrls = "127.0.0.1:\n8269" + _, err = cfg4.genEmbedEtcdConfig() + c.Assert(terror.ErrMasterGenEmbedEtcdConfigFail.Equal(err), check.IsTrue) + c.Assert(err, check.ErrorMatches, "(?m).*--initial-advertise-peer-urls.*") + cfg4.AdvertisePeerUrls = "http://172.100.8.8:8269" + etcdCfg, err = cfg4.genEmbedEtcdConfig() + c.Assert(err, check.IsNil) + c.Assert(etcdCfg.APUrls, check.DeepEquals, []url.URL{{Scheme: "http", Host: "172.100.8.8:8269"}}) } func (t *testConfigSuite) TestParseURLs(c *check.C) { From 625a891a211fb5054088b28c2620fbf95a40737c Mon Sep 17 00:00:00 2001 From: csuzhangxc Date: Tue, 5 Nov 2019 14:58:32 +0800 Subject: [PATCH 07/27] master: refine API tests --- dm/master/http_handler_test.go | 18 ------------------ dm/master/server_test.go | 14 ++++++++++---- 2 files changed, 10 insertions(+), 22 deletions(-) diff --git a/dm/master/http_handler_test.go b/dm/master/http_handler_test.go index 904f7261b6..c66843fe0c 100644 --- a/dm/master/http_handler_test.go +++ b/dm/master/http_handler_test.go @@ -18,10 +18,8 @@ import ( "fmt" "io/ioutil" "net/http" - "time" "github.com/pingcap/check" - "github.com/pingcap/errors" ) var _ = check.Suite(&testHTTPServer{}) @@ -50,22 +48,6 @@ func (t *testHTTPServer) stopServer(c *check.C) { } } -const retryTime = 100 - -func (t *testHTTPServer) waitUntilServerOnline() error { - statusURL := fmt.Sprintf("http://127.0.0.1%s/status", t.cfg.MasterAddr) - for i := 0; i < retryTime; i++ { - resp, err := http.Get(statusURL) - if err == nil && resp.StatusCode == http.StatusOK { - ioutil.ReadAll(resp.Body) - resp.Body.Close() - return nil - } - time.Sleep(time.Millisecond * 10) - } - return errors.Errorf("failed to connect http status for %d retries in every 10ms", retryTime) -} - func (t *testHTTPServer) TestStatus(c *check.C) { ctx, cancel := context.WithCancel(context.Background()) t.startServer(ctx, c) diff --git a/dm/master/server_test.go b/dm/master/server_test.go index 52bdd4d7ac..7e4cedcd69 100644 --- a/dm/master/server_test.go +++ b/dm/master/server_test.go @@ -14,6 +14,7 @@ package master import ( + "bytes" "context" "io/ioutil" "net/http" @@ -1488,6 +1489,7 @@ func (t *testMaster) TestServer(c *check.C) { cfg := NewConfig() c.Assert(cfg.Parse([]string{"-config=./dm-master.toml"}), check.IsNil) cfg.DataDir = c.MkDir() + cfg.MasterAddr = "127.0.0.1:18261" // use a different port s := NewServer(cfg) @@ -1495,7 +1497,9 @@ func (t *testMaster) TestServer(c *check.C) { err1 := s.Start(ctx) c.Assert(err1, check.IsNil) - t.testHTTPInterface(c, "status") + t.testHTTPInterface(c, fmt.Sprintf("http://%s/status", cfg.MasterAddr), []byte(utils.GetRawInfo())) + t.testHTTPInterface(c, fmt.Sprintf("http://%s/debug/pprof/", cfg.MasterAddr), []byte("Types of profiles available")) + t.testHTTPInterface(c, fmt.Sprintf("http://%s/apis/v1alpha1/status/test-task", cfg.MasterAddr), []byte("task test-task has no workers or not exist")) dupServer := NewServer(cfg) err := dupServer.Start(ctx) @@ -1511,11 +1515,13 @@ func (t *testMaster) TestServer(c *check.C) { }), check.IsTrue) } -func (t *testMaster) testHTTPInterface(c *check.C, uri string) { - resp, err := http.Get("http://127.0.0.1:8261/" + uri) +func (t *testMaster) testHTTPInterface(c *check.C, url string, contain []byte) { + resp, err := http.Get(url) c.Assert(err, check.IsNil) defer resp.Body.Close() c.Assert(resp.StatusCode, check.Equals, 200) - _, err = ioutil.ReadAll(resp.Body) + + body, err := ioutil.ReadAll(resp.Body) c.Assert(err, check.IsNil) + c.Assert(bytes.Contains(body, contain), check.IsTrue) } From cfb009609d55082fa7a22ae05daf0cd9eb96f635 Mon Sep 17 00:00:00 2001 From: csuzhangxc Date: Tue, 5 Nov 2019 15:29:25 +0800 Subject: [PATCH 08/27] master: refine code --- dm/master/config.go | 5 ++- dm/master/http_handler.go | 4 +- dm/master/http_handler_test.go | 67 ---------------------------------- 3 files changed, 4 insertions(+), 72 deletions(-) delete mode 100644 dm/master/http_handler_test.go diff --git a/dm/master/config.go b/dm/master/config.go index 28b68e7b0b..5e8c6c1758 100644 --- a/dm/master/config.go +++ b/dm/master/config.go @@ -201,7 +201,7 @@ func (c *Config) adjust() error { c.DeployMap = make(map[string]string) for _, item := range c.Deploy { - if err := item.Verify(); err != nil { + if err = item.Verify(); err != nil { return err } @@ -233,7 +233,8 @@ func (c *Config) adjust() error { } if c.Name == "" { - hostname, err := os.Hostname() + var hostname string + hostname, err = os.Hostname() if err != nil { return terror.ErrMasterGetHostnameFail.Delegate(err) } diff --git a/dm/master/http_handler.go b/dm/master/http_handler.go index 234d5b3b30..ffb1ddddfc 100644 --- a/dm/master/http_handler.go +++ b/dm/master/http_handler.go @@ -21,7 +21,6 @@ import ( "github.com/grpc-ecosystem/grpc-gateway/runtime" "google.golang.org/grpc" - "github.com/pingcap/dm/dm/common" "github.com/pingcap/dm/dm/pb" "github.com/pingcap/dm/pkg/log" "github.com/pingcap/dm/pkg/terror" @@ -35,8 +34,7 @@ type statusHandler struct { func (h *statusHandler) ServeHTTP(w http.ResponseWriter, req *http.Request) { w.Header().Set("Content-Type", "text/plain") text := utils.GetRawInfo() - _, err := w.Write([]byte(text)) - if err != nil && !common.IsErrNetClosing(err) { + if _, err := w.Write([]byte(text)); err != nil { log.L().Error("write status response", log.ShortError(err)) } } diff --git a/dm/master/http_handler_test.go b/dm/master/http_handler_test.go deleted file mode 100644 index c66843fe0c..0000000000 --- a/dm/master/http_handler_test.go +++ /dev/null @@ -1,67 +0,0 @@ -// Copyright 2019 PingCAP, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// See the License for the specific language governing permissions and -// limitations under the License. - -package master - -import ( - "context" - "fmt" - "io/ioutil" - "net/http" - - "github.com/pingcap/check" -) - -var _ = check.Suite(&testHTTPServer{}) - -type testHTTPServer struct { - server *Server - cfg *Config -} - -func (t *testHTTPServer) startServer(ctx context.Context, c *check.C) { - t.cfg = NewConfig() - t.cfg.MasterAddr = ":8261" - t.cfg.RPCRateLimit = DefaultRate - t.cfg.RPCRateBurst = DefaultBurst - t.cfg.DataDir = c.MkDir() - c.Assert(t.cfg.adjust(), check.IsNil) - - t.server = NewServer(t.cfg) - err := t.server.Start(ctx) - c.Assert(err, check.IsNil) -} - -func (t *testHTTPServer) stopServer(c *check.C) { - if t.server != nil { - t.server.Close() - } -} - -func (t *testHTTPServer) TestStatus(c *check.C) { - ctx, cancel := context.WithCancel(context.Background()) - t.startServer(ctx, c) - defer func() { - cancel() - t.stopServer(c) - }() - - statusURL := fmt.Sprintf("http://127.0.0.1%s/status", t.cfg.MasterAddr) - resp, err := http.Get(statusURL) - c.Assert(err, check.IsNil) - c.Assert(resp.StatusCode, check.Equals, http.StatusOK) - buf, err2 := ioutil.ReadAll(resp.Body) - c.Assert(err2, check.IsNil) - status := string(buf) - c.Assert(status, check.Matches, "Release Version:.*\nGit Commit Hash:.*\nGit Branch:.*\nUTC Build Time:.*\nGo Version:.*\n") -} From 6dfbe573416721dfcc1f817d8808029657eff309 Mon Sep 17 00:00:00 2001 From: csuzhangxc Date: Wed, 6 Nov 2019 12:00:25 +0800 Subject: [PATCH 09/27] master: update sample config items oder --- dm/master/dm-master.toml | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/dm/master/dm-master.toml b/dm/master/dm-master.toml index 50f8308e9d..7209aee900 100644 --- a/dm/master/dm-master.toml +++ b/dm/master/dm-master.toml @@ -1,24 +1,10 @@ # Master Configuration. -# rpc configuration -# -# rpc timeout is a positive number plus time unit. we use golang standard time -# units including: "ns", "us", "ms", "s", "m", "h". You should provide a proper -# rpc timeout according to your use scenario. -rpc-timeout = "30s" -# rpc limiter controls how frequently events are allowed to happen. -# It implements a "token bucket" of size `rpc-rate-limit`, initially full and -# refilled at rate `rpc-rate-limit` tokens per second. Note `rpc-rate-limit` -# is float64 type, so remember to add a decimal point and one trailing 0 if its -# literal value happens to be an integer. -rpc-rate-limit = 10.0 -rpc-rate-burst = 40 - -#log configuration +# log configuration log-level = "info" log-file = "dm-master.log" -#dm-master listen address +# dm-master listen address master-addr = ":8261" # human-readable name for this DM-master member @@ -36,6 +22,20 @@ advertise-peer-urls = "http://127.0.0.1:8269" # initial cluster configuration for bootstrapping, e,g. dm-master=http://127.0.0.1:8269 initial-cluster = "dm-master=http://127.0.0.1:8269" +# rpc configuration +# +# rpc timeout is a positive number plus time unit. we use golang standard time +# units including: "ns", "us", "ms", "s", "m", "h". You should provide a proper +# rpc timeout according to your use scenario. +rpc-timeout = "30s" +# rpc limiter controls how frequently events are allowed to happen. +# It implements a "token bucket" of size `rpc-rate-limit`, initially full and +# refilled at rate `rpc-rate-limit` tokens per second. Note `rpc-rate-limit` +# is float64 type, so remember to add a decimal point and one trailing 0 if its +# literal value happens to be an integer. +rpc-rate-limit = 10.0 +rpc-rate-burst = 40 + # replication group <-> dm-Worker deployment, we'll refine it when new deployment function is available [[deploy]] source-id = "mysql-replica-01" From 3d664b444892700d899725ee0ffecfcfe4124ae6 Mon Sep 17 00:00:00 2001 From: csuzhangxc Date: Wed, 6 Nov 2019 19:30:55 +0800 Subject: [PATCH 10/27] master: refine code --- dm/master/etcd.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/dm/master/etcd.go b/dm/master/etcd.go index c9b37b7d02..a743664a0b 100644 --- a/dm/master/etcd.go +++ b/dm/master/etcd.go @@ -17,7 +17,6 @@ import ( "net/http" "time" - "github.com/pingcap/errors" "go.etcd.io/etcd/embed" "google.golang.org/grpc" @@ -52,7 +51,7 @@ func startEtcd(masterCfg *Config, case <-time.After(timeout): e.Server.Stop() e.Close() - return nil, terror.ErrMasterStartEmbedEtcdFail.Delegate(errors.Errorf("start embed etcd timeout %v", timeout)) + return nil, terror.ErrMasterStartEmbedEtcdFail.Generatef("start embed etcd timeout %v", timeout) } return e, nil } From 139ef1ae29147fc6798696fcec1a4680c57cfd1e Mon Sep 17 00:00:00 2001 From: csuzhangxc Date: Thu, 7 Nov 2019 13:44:51 +0800 Subject: [PATCH 11/27] *: prepare join embed etcd cluster --- _utils/terror_gen/errors_release.txt | 1 + dm/master/config.go | 26 +++-- dm/master/etcd.go | 120 +++++++++++++++++++++ go.mod | 2 +- pkg/etcdutil/etcdutil.go | 46 ++++++++ pkg/etcdutil/etcdutil_test.go | 153 +++++++++++++++++++++++++++ pkg/terror/error_list.go | 2 + 7 files changed, 340 insertions(+), 10 deletions(-) create mode 100644 pkg/etcdutil/etcdutil.go create mode 100644 pkg/etcdutil/etcdutil_test.go diff --git a/_utils/terror_gen/errors_release.txt b/_utils/terror_gen/errors_release.txt index e779c24322..ed32705e41 100644 --- a/_utils/terror_gen/errors_release.txt +++ b/_utils/terror_gen/errors_release.txt @@ -316,6 +316,7 @@ ErrMasterGetHostnameFail,[code=38036:class=dm-master:scope=internal:level=high], ErrMasterGenEmbedEtcdConfigFail,[code=38037:class=dm-master:scope=internal:level=high],"generate config item %s for embed etcd fail" ErrMasterStartEmbedEtcdFail,[code=38038:class=dm-master:scope=internal:level=high],"start embed etcd fail" ErrMasterParseURLFail,[code=38039:class=dm-master:scope=internal:level=high],"parse URL %s fail" +ErrMasterJoinEmbedEtcdFail,[code=38040:class=dm-master:scope=internal:level=high],"join embed etcd fail" ErrWorkerParseFlagSet,[code=40001:class=dm-worker:scope=internal:level=medium],"parse dm-worker config flag set" ErrWorkerInvalidFlag,[code=40002:class=dm-worker:scope=internal:level=medium],"'%s' is an invalid flag" ErrWorkerDecodeConfigFromFile,[code=40003:class=dm-worker:scope=internal:level=medium],"toml decode file" diff --git a/dm/master/config.go b/dm/master/config.go index 5e8c6c1758..f1b87d0b8d 100644 --- a/dm/master/config.go +++ b/dm/master/config.go @@ -35,10 +35,11 @@ import ( ) const ( - defaultRPCTimeout = "30s" - defaultNamePrefix = "dm-master" - defaultDataDirPrefix = "default" - defaultPeerUrls = "http://127.0.0.1:8269" + defaultRPCTimeout = "30s" + defaultNamePrefix = "dm-master" + defaultDataDirPrefix = "default" + defaultPeerUrls = "http://127.0.0.1:8269" + defaultInitialClusterState = embed.ClusterStateFlagNew ) // SampleConfigFile is sample config file of dm-master @@ -65,6 +66,7 @@ func NewConfig() *Config { fs.StringVar(&cfg.InitialCluster, "initial-cluster", "", fmt.Sprintf("initial cluster configuration for bootstrapping, e,g. dm-master=%s", defaultPeerUrls)) fs.StringVar(&cfg.PeerUrls, "peer-urls", defaultPeerUrls, "URLs for peer traffic") fs.StringVar(&cfg.AdvertisePeerUrls, "advertise-peer-urls", "", `advertise URLs for peer traffic (default "${peer-urls}")`) + fs.StringVar(&cfg.Join, "join", "", `join to an existing cluster (usage: cluster's "${advertise-client-urls}"`) return cfg } @@ -108,11 +110,13 @@ type Config struct { // etcd relative config items // NOTE: we use `MasterAddr` to generate `ClientUrls` and `AdvertiseClientUrls` // NOTE: more items will be add when adding leader election - Name string `toml:"name" json:"name"` - DataDir string `toml:"data-dir" json:"data-dir"` - PeerUrls string `toml:"peer-urls" json:"peer-urls"` - AdvertisePeerUrls string `toml:"advertise-peer-urls" json:"advertise-peer-urls"` - InitialCluster string `toml:"initial-cluster" json:"initial-cluster"` + Name string `toml:"name" json:"name"` + DataDir string `toml:"data-dir" json:"data-dir"` + PeerUrls string `toml:"peer-urls" json:"peer-urls"` + AdvertisePeerUrls string `toml:"advertise-peer-urls" json:"advertise-peer-urls"` + InitialCluster string `toml:"initial-cluster" json:"initial-cluster"` + InitialClusterState string `toml:"initial-cluster-state" json:"initial-cluster-state"` + Join string `toml:"join" json:"join"` printVersion bool printSampleConfig bool @@ -261,6 +265,10 @@ func (c *Config) adjust() error { c.InitialCluster = strings.Join(items, ",") } + if c.InitialClusterState == "" { + c.InitialClusterState = defaultInitialClusterState + } + _, err = c.genEmbedEtcdConfig() // verify embed etcd config return err } diff --git a/dm/master/etcd.go b/dm/master/etcd.go index a743664a0b..4983aa9dfe 100644 --- a/dm/master/etcd.go +++ b/dm/master/etcd.go @@ -14,15 +14,27 @@ package master import ( + "fmt" + "io/ioutil" "net/http" + "os" + "path/filepath" + "strings" "time" + "go.etcd.io/etcd/clientv3" "go.etcd.io/etcd/embed" "google.golang.org/grpc" + "github.com/pingcap/dm/pkg/etcdutil" "github.com/pingcap/dm/pkg/terror" ) +var ( + // privateDirMode grants owner to make/remove files inside the directory. + privateDirMode os.FileMode = 0700 +) + // startEtcd starts an embedded etcd server. func startEtcd(masterCfg *Config, gRPCSvr func(*grpc.Server), @@ -55,3 +67,111 @@ func startEtcd(masterCfg *Config, } return e, nil } + +// prepareJoinEtcd prepares config needed to join an existing cluster. +// learn from https://github.com/pingcap/pd/blob/37efcb05f397f26c70cda8dd44acaa3061c92159/server/join/join.go#L44. +func prepareJoinEtcd(cfg *Config) error { + // no need to join + if cfg.Join == "" { + return nil + } + + // try to join self, invalid + if cfg.Join == cfg.AdvertisePeerUrls { + return terror.ErrMasterJoinEmbedEtcdFail.Generatef("join self %s is forbidden", cfg.Join) + } + + // join with persist data + joinFP := filepath.Join(cfg.DataDir, "join") + if _, err := os.Stat(joinFP); !os.IsNotExist(err) { + s, err := ioutil.ReadFile(joinFP) + if err != nil { + return terror.ErrMasterJoinEmbedEtcdFail.Delegate(err) + } + cfg.InitialCluster = strings.TrimSpace(string(s)) + cfg.InitialClusterState = embed.ClusterStateFlagExisting + return nil + } + + // restart from previous data, no `InitialCluster` need to set + if isDataExist(filepath.Join(cfg.DataDir, "member")) { + cfg.InitialCluster = "" + cfg.InitialClusterState = embed.ClusterStateFlagExisting + return nil + } + + // if without previous data, we need a client to contact with the existing cluster. + client, err := clientv3.New(clientv3.Config{ + Endpoints: strings.Split(cfg.Join, ","), + DialTimeout: etcdutil.DefaultDialTimeout, + }) + if err != nil { + return terror.ErrMasterJoinEmbedEtcdFail.Delegate(err) + } + defer client.Close() + + // `member list` + listResp, err := etcdutil.ListMembers(client) + if err != nil { + return terror.ErrMasterJoinEmbedEtcdFail.Delegate(err) + } + + // check members + for _, m := range listResp.Members { + if m.Name == "" { + return terror.ErrMasterJoinEmbedEtcdFail.New("there is a member that has not joined successfully") + } + if m.Name == cfg.Name { + // a failed DM-master re-joins the previous cluster. + return terror.ErrMasterJoinEmbedEtcdFail.Generatef("missing data or joining a duplicated dm-master %s", m.Name) + } + } + + // `member add`, a new/deleted DM-master joins to an existing cluster. + addResp, err := etcdutil.AddMember(client, strings.Split(cfg.AdvertisePeerUrls, ",")) + if err != nil { + return terror.ErrMasterJoinEmbedEtcdFail.Delegate(err) + } + + // generate `--initial-cluster` + ms := make([]string, 0, len(addResp.Members)) + for _, m := range addResp.Members { + name := m.Name + if m.ID == addResp.Member.ID { + name = cfg.Name + } + if name == "" { + return terror.ErrMasterJoinEmbedEtcdFail.New("there is a member that has not joined successfully") + } + for _, url := range m.PeerURLs { + ms = append(ms, fmt.Sprintf("%s=%s", name, url)) + } + } + cfg.InitialCluster = strings.Join(ms, ",") + cfg.InitialClusterState = embed.ClusterStateFlagExisting + + // save `--initial-cluster` in persist data + if err = os.MkdirAll(cfg.DataDir, privateDirMode); !os.IsExist(err) { + return terror.ErrMasterJoinEmbedEtcdFail.Delegate(err) + } + if err = ioutil.WriteFile(joinFP, []byte(cfg.InitialCluster), privateDirMode); err != nil { + return terror.ErrMasterJoinEmbedEtcdFail.Delegate(err) + } + + return nil +} + +// isDataExist returns whether the directory is empty (with data) +func isDataExist(d string) bool { + dir, err := os.Open(d) + if err != nil { + return false + } + defer dir.Close() + + names, err := dir.Readdirnames(1) // read only one is enough + if err != nil { + return false + } + return len(names) != 0 +} diff --git a/go.mod b/go.mod index d734b235fa..ad0a3b8156 100644 --- a/go.mod +++ b/go.mod @@ -5,7 +5,7 @@ require ( github.com/DATA-DOG/go-sqlmock v1.3.3 github.com/StackExchange/wmi v0.0.0-20190523213315-cbe66965904d // indirect github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e - github.com/coreos/etcd v3.3.15+incompatible // indirect + github.com/coreos/etcd v3.3.15+incompatible github.com/coreos/go-semver v0.3.0 // indirect github.com/coreos/go-systemd v0.0.0-20190719114852-fd7a80b32e1f // indirect github.com/cznic/sortutil v0.0.0-20181122101858-f5f958428db8 // indirect diff --git a/pkg/etcdutil/etcdutil.go b/pkg/etcdutil/etcdutil.go new file mode 100644 index 0000000000..f58a8a0801 --- /dev/null +++ b/pkg/etcdutil/etcdutil.go @@ -0,0 +1,46 @@ +// Copyright 2019 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + +// learn from https://github.com/pingcap/pd/blob/v3.0.5/pkg/etcdutil/etcdutil.go. + +package etcdutil + +import ( + "context" + "time" + + "go.etcd.io/etcd/clientv3" +) + +const ( + // DefaultDialTimeout is the maximum amount of time a dial will wait for a + // connection to setup. 30s is long enough for most of the network conditions. + DefaultDialTimeout = 30 * time.Second + + // DefaultRequestTimeout 10s is long enough for most of etcd clusters. + DefaultRequestTimeout = 10 * time.Second +) + +// ListMembers returns a list of internal etcd members. +func ListMembers(client *clientv3.Client) (*clientv3.MemberListResponse, error) { + ctx, cancel := context.WithTimeout(client.Ctx(), DefaultRequestTimeout) + defer cancel() + return client.MemberList(ctx) +} + +// AddMember adds an etcd member. +func AddMember(client *clientv3.Client, peerAddrs []string) (*clientv3.MemberAddResponse, error) { + ctx, cancel := context.WithTimeout(client.Ctx(), DefaultRequestTimeout) + defer cancel() + return client.MemberAdd(ctx, peerAddrs) +} diff --git a/pkg/etcdutil/etcdutil_test.go b/pkg/etcdutil/etcdutil_test.go new file mode 100644 index 0000000000..315a3ca379 --- /dev/null +++ b/pkg/etcdutil/etcdutil_test.go @@ -0,0 +1,153 @@ +// Copyright 2019 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + +package etcdutil + +import ( + "fmt" + "net/url" + "strings" + "testing" + "time" + + "github.com/coreos/etcd/etcdserver/etcdserverpb" + . "github.com/pingcap/check" + "go.etcd.io/etcd/clientv3" + "go.etcd.io/etcd/embed" +) + +var _ = Suite(&testEtcdUtilSuite{}) + +type testEtcdUtilSuite struct { +} + +func TestSuite(t *testing.T) { + TestingT(t) +} + +func (t *testEtcdUtilSuite) newConfig(c *C, name string, basePort uint16, portCount int) (*embed.Config, uint16) { + cfg := embed.NewConfig() + cfg.Name = name + cfg.Dir = c.MkDir() + + cfg.LCUrls = []url.URL{} + for i := 0; i < portCount; i++ { + cu, err := url.Parse(fmt.Sprintf("http://127.0.0.1:%d", basePort)) + c.Assert(err, IsNil) + cfg.LCUrls = append(cfg.LCUrls, *cu) + basePort++ + } + cfg.ACUrls = cfg.LCUrls + + cfg.LPUrls = []url.URL{} + ic := make([]string, 0, portCount) + for i := 0; i < portCount; i++ { + pu, err := url.Parse(fmt.Sprintf("http://127.0.0.1:%d", basePort)) + c.Assert(err, IsNil) + cfg.LPUrls = append(cfg.LPUrls, *pu) + ic = append(ic, fmt.Sprintf("%s=%s", cfg.Name, pu)) + basePort++ + } + cfg.APUrls = cfg.LPUrls + + cfg.InitialCluster = strings.Join(ic, ",") + cfg.ClusterState = embed.ClusterStateFlagNew + return cfg, basePort +} + +func (t *testEtcdUtilSuite) urlsToStrings(URLs []url.URL) []string { + ret := make([]string, 0, len(URLs)) + for _, u := range URLs { + ret = append(ret, u.String()) + } + return ret +} + +func (t *testEtcdUtilSuite) startEtcd(c *C, cfg *embed.Config) *embed.Etcd { + e, err := embed.StartEtcd(cfg) + c.Assert(err, IsNil) + + timeout := time.Second + select { + case <-e.Server.ReadyNotify(): + case <-time.After(timeout): + c.Fatalf("start embed etcd timeout %v", timeout) + } + + return e +} + +func (t *testEtcdUtilSuite) createEtcdClient(c *C, cfg *embed.Config) *clientv3.Client { + cli, err := clientv3.New(clientv3.Config{Endpoints: t.urlsToStrings(cfg.LCUrls)}) + c.Assert(err, IsNil) + return cli +} + +func (t *testEtcdUtilSuite) checkMember(c *C, mid uint64, m *etcdserverpb.Member, cfg *embed.Config) { + if m.Name != "" { // no name exists after `member add` + c.Assert(m.Name, Equals, cfg.Name) + } + c.Assert(m.ID, Equals, mid) + c.Assert(m.ClientURLs, DeepEquals, t.urlsToStrings(cfg.ACUrls)) + c.Assert(m.PeerURLs, DeepEquals, t.urlsToStrings(cfg.APUrls)) +} + +func (t *testEtcdUtilSuite) TestMemberUtil(c *C) { + for i := 1; i <= 3; i++ { + t.testMemberUtilInternal(c, i) + } +} + +func (t *testEtcdUtilSuite) testMemberUtilInternal(c *C, portCount int) { + var basePort uint16 = 8361 + + // start a etcd + cfg1, basePort := t.newConfig(c, "etcd1", basePort, portCount) + etcd1 := t.startEtcd(c, cfg1) + defer etcd1.Close() + + // list member + cli := t.createEtcdClient(c, cfg1) + listResp1, err := ListMembers(cli) + c.Assert(err, IsNil) + c.Assert(listResp1.Members, HasLen, 1) + t.checkMember(c, uint64(etcd1.Server.ID()), listResp1.Members[0], cfg1) + + // add member + cfg2, basePort := t.newConfig(c, "etcd2", basePort, portCount) + cfg2.InitialCluster = cfg1.InitialCluster + "," + cfg2.InitialCluster + cfg2.ClusterState = embed.ClusterStateFlagExisting + addResp, err := AddMember(cli, t.urlsToStrings(cfg2.APUrls)) + c.Assert(err, IsNil) + c.Assert(addResp.Members, HasLen, 2) + + // start the added member + etcd2 := t.startEtcd(c, cfg2) + defer etcd2.Close() + c.Assert(addResp.Member.ID, Equals, uint64(etcd2.Server.ID())) + + // list member again + listResp2, err := ListMembers(cli) + c.Assert(err, IsNil) + c.Assert(listResp2.Members, HasLen, 2) + for _, m := range listResp2.Members { + switch m.ID { + case uint64(etcd1.Server.ID()): + t.checkMember(c, uint64(etcd1.Server.ID()), m, cfg1) + case uint64(etcd2.Server.ID()): + t.checkMember(c, uint64(etcd2.Server.ID()), m, cfg2) + default: + c.Fatalf("unknown member %v", m) + } + } +} diff --git a/pkg/terror/error_list.go b/pkg/terror/error_list.go index bbc29d3396..9b4b300799 100644 --- a/pkg/terror/error_list.go +++ b/pkg/terror/error_list.go @@ -385,6 +385,7 @@ const ( codeMasterGenEmbedEtcdConfigFail codeMasterStartEmbedEtcdFail codeMasterParseURLFail + codeMasterJoinEmbedEtcdFail ) // DM-worker error code @@ -826,6 +827,7 @@ var ( ErrMasterGenEmbedEtcdConfigFail = New(codeMasterGenEmbedEtcdConfigFail, ClassDMMaster, ScopeInternal, LevelHigh, "generate config item %s for embed etcd fail") ErrMasterStartEmbedEtcdFail = New(codeMasterStartEmbedEtcdFail, ClassDMMaster, ScopeInternal, LevelHigh, "start embed etcd fail") ErrMasterParseURLFail = New(codeMasterParseURLFail, ClassDMMaster, ScopeInternal, LevelHigh, "parse URL %s fail") + ErrMasterJoinEmbedEtcdFail = New(codeMasterJoinEmbedEtcdFail, ClassDMMaster, ScopeInternal, LevelHigh, "join embed etcd fail") // DM-worker error ErrWorkerParseFlagSet = New(codeWorkerParseFlagSet, ClassDMWorker, ScopeInternal, LevelMedium, "parse dm-worker config flag set") From 31436e8155c1fac9c59e8a2bc5d78c8399d651a6 Mon Sep 17 00:00:00 2001 From: csuzhangxc Date: Mon, 11 Nov 2019 11:12:13 +0800 Subject: [PATCH 12/27] master: address comments --- dm/master/config.go | 9 ++++++--- dm/master/config_test.go | 6 +++--- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/dm/master/config.go b/dm/master/config.go index 5e8c6c1758..c45b2edc1a 100644 --- a/dm/master/config.go +++ b/dm/master/config.go @@ -298,19 +298,19 @@ func (c *Config) genEmbedEtcdConfig() (*embed.Config, error) { // reuse the previous master-addr as the client listening URL. cURL, err := parseURLs(c.MasterAddr) if err != nil { - return nil, terror.ErrMasterGenEmbedEtcdConfigFail.Delegate(err, "--listen-client-urls/--advertise-client-urls") + return nil, terror.ErrMasterGenEmbedEtcdConfigFail.Delegate(err, "invalid master-addr") } cfg.LCUrls = cURL cfg.ACUrls = cURL cfg.LPUrls, err = parseURLs(c.PeerUrls) if err != nil { - return nil, terror.ErrMasterGenEmbedEtcdConfigFail.Delegate(err, "--listen-peer-urls") + return nil, terror.ErrMasterGenEmbedEtcdConfigFail.Delegate(err, "invalid peer-urls") } cfg.APUrls, err = parseURLs(c.AdvertisePeerUrls) if err != nil { - return nil, terror.ErrMasterGenEmbedEtcdConfigFail.Delegate(err, "--initial-advertise-peer-urls") + return nil, terror.ErrMasterGenEmbedEtcdConfigFail.Delegate(err, "invalid advertise-peer-urls") } cfg.InitialCluster = c.InitialCluster @@ -330,6 +330,9 @@ func parseURLs(s string) ([]url.URL, error) { urls := make([]url.URL, 0, len(items)) for _, item := range items { u, err := url.Parse(item) + // tolerate valid `master-addr`, but invalid URL format, like: + // `:8261`: missing protocol scheme + // `127.0.0.1:8261`: first path segment in URL cannot contain colon if err != nil && (strings.Contains(err.Error(), "missing protocol scheme") || strings.Contains(err.Error(), "first path segment in URL cannot contain colon")) { u, err = url.Parse("http://" + item) diff --git a/dm/master/config_test.go b/dm/master/config_test.go index 4b74c7f5be..419962b766 100644 --- a/dm/master/config_test.go +++ b/dm/master/config_test.go @@ -243,7 +243,7 @@ func (t *testConfigSuite) TestGenEmbedEtcdConfig(c *check.C) { cfg2.MasterAddr = "127.0.0.1\n:8261" _, err = cfg2.genEmbedEtcdConfig() c.Assert(terror.ErrMasterGenEmbedEtcdConfigFail.Equal(err), check.IsTrue) - c.Assert(err, check.ErrorMatches, "(?m).*--listen-client-urls/--advertise-client-urls.*") + c.Assert(err, check.ErrorMatches, "(?m).*invalid master-addr.*") cfg2.MasterAddr = "172.100.8.8:8261" etcdCfg, err = cfg2.genEmbedEtcdConfig() c.Assert(err, check.IsNil) @@ -254,7 +254,7 @@ func (t *testConfigSuite) TestGenEmbedEtcdConfig(c *check.C) { cfg3.PeerUrls = "127.0.0.1:\n8269" _, err = cfg3.genEmbedEtcdConfig() c.Assert(terror.ErrMasterGenEmbedEtcdConfigFail.Equal(err), check.IsTrue) - c.Assert(err, check.ErrorMatches, "(?m).*--listen-peer-urls.*") + c.Assert(err, check.ErrorMatches, "(?m).*invalid peer-urls.*") cfg3.PeerUrls = "http://172.100.8.8:8269" etcdCfg, err = cfg3.genEmbedEtcdConfig() c.Assert(err, check.IsNil) @@ -264,7 +264,7 @@ func (t *testConfigSuite) TestGenEmbedEtcdConfig(c *check.C) { cfg4.AdvertisePeerUrls = "127.0.0.1:\n8269" _, err = cfg4.genEmbedEtcdConfig() c.Assert(terror.ErrMasterGenEmbedEtcdConfigFail.Equal(err), check.IsTrue) - c.Assert(err, check.ErrorMatches, "(?m).*--initial-advertise-peer-urls.*") + c.Assert(err, check.ErrorMatches, "(?m).*invalid advertise-peer-urls.*") cfg4.AdvertisePeerUrls = "http://172.100.8.8:8269" etcdCfg, err = cfg4.genEmbedEtcdConfig() c.Assert(err, check.IsNil) From fad73c93f235bf1c5114147b617448fa4d1e66bd Mon Sep 17 00:00:00 2001 From: csuzhangxc Date: Mon, 11 Nov 2019 21:29:04 +0800 Subject: [PATCH 13/27] *: add some tests for prepareJoinEtcd --- dm/master/etcd.go | 6 +-- dm/master/etcd_test.go | 103 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 106 insertions(+), 3 deletions(-) create mode 100644 dm/master/etcd_test.go diff --git a/dm/master/etcd.go b/dm/master/etcd.go index 4983aa9dfe..5b3e68b85b 100644 --- a/dm/master/etcd.go +++ b/dm/master/etcd.go @@ -81,19 +81,19 @@ func prepareJoinEtcd(cfg *Config) error { return terror.ErrMasterJoinEmbedEtcdFail.Generatef("join self %s is forbidden", cfg.Join) } - // join with persist data + // join with persistent data joinFP := filepath.Join(cfg.DataDir, "join") if _, err := os.Stat(joinFP); !os.IsNotExist(err) { s, err := ioutil.ReadFile(joinFP) if err != nil { - return terror.ErrMasterJoinEmbedEtcdFail.Delegate(err) + return terror.ErrMasterJoinEmbedEtcdFail.AnnotateDelegate(err, "read persistent join data") } cfg.InitialCluster = strings.TrimSpace(string(s)) cfg.InitialClusterState = embed.ClusterStateFlagExisting return nil } - // restart from previous data, no `InitialCluster` need to set + // restart with previous data, no `InitialCluster` need to set if isDataExist(filepath.Join(cfg.DataDir, "member")) { cfg.InitialCluster = "" cfg.InitialClusterState = embed.ClusterStateFlagExisting diff --git a/dm/master/etcd_test.go b/dm/master/etcd_test.go new file mode 100644 index 0000000000..bb03b01a4c --- /dev/null +++ b/dm/master/etcd_test.go @@ -0,0 +1,103 @@ +// Copyright 2019 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + +package master + +import ( + "fmt" + "io/ioutil" + "os" + "path/filepath" + + "github.com/pingcap/check" + "go.etcd.io/etcd/embed" + + "github.com/pingcap/dm/pkg/terror" +) + +var _ = check.Suite(&testEtcdSuite{}) + +type testEtcdSuite struct { +} + +func (t *testEtcdSuite) TestPrepareJoinEtcd(c *check.C) { + joinCluster := "dm-master-1=http://172.100.100.100:8269" + + cfgBefore := NewConfig() // before `prepareJoinEtcd` applied + cfgBefore.MasterAddr = ":8261" + cfgBefore.DataDir = c.MkDir() + c.Assert(cfgBefore.adjust(), check.IsNil) + cfgAfter := t.cloneConfig(cfgBefore) // after `prepareJoinEtcd applied + joinFP := filepath.Join(cfgBefore.DataDir, "join") + memberDP := filepath.Join(cfgBefore.DataDir, "member") + + // not set `join`, do nothing + c.Assert(prepareJoinEtcd(cfgAfter), check.IsNil) + c.Assert(cfgAfter, check.DeepEquals, cfgBefore) + + // try to join self + cfgAfter.Join = cfgAfter.AdvertisePeerUrls + err := prepareJoinEtcd(cfgAfter) + c.Assert(terror.ErrMasterJoinEmbedEtcdFail.Equal(err), check.IsTrue) + c.Assert(err, check.ErrorMatches, ".*join self.*is forbidden.*") + + // update `join` to a valid item + cfgBefore.Join = joinCluster + + // join with persistent data + c.Assert(ioutil.WriteFile(joinFP, []byte(joinCluster), privateDirMode), check.IsNil) + cfgAfter = t.cloneConfig(cfgBefore) + c.Assert(prepareJoinEtcd(cfgAfter), check.IsNil) + c.Assert(cfgAfter.InitialCluster, check.Equals, joinCluster) + c.Assert(cfgAfter.InitialClusterState, check.Equals, embed.ClusterStateFlagExisting) + c.Assert(os.Remove(joinFP), check.IsNil) // remove the persistent data + + // join with invalid persistent data + c.Assert(os.Mkdir(joinFP, privateDirMode), check.IsNil) // use directory as invalid persistent data (file) + cfgAfter = t.cloneConfig(cfgBefore) + err = prepareJoinEtcd(cfgAfter) + c.Assert(terror.ErrMasterJoinEmbedEtcdFail.Equal(err), check.IsTrue) + c.Assert(err, check.ErrorMatches, ".*read persistent join data.*") + c.Assert(os.Remove(joinFP), check.IsNil) // remove the persistent data + + // restart with previous data + c.Assert(os.Mkdir(memberDP, privateDirMode), check.IsNil) + c.Assert(os.Mkdir(filepath.Join(memberDP, "wal"), privateDirMode), check.IsNil) + c.Assert(prepareJoinEtcd(cfgAfter), check.IsNil) + c.Assert(cfgAfter.InitialCluster, check.Equals, "") + c.Assert(cfgAfter.InitialClusterState, check.Equals, embed.ClusterStateFlagExisting) + c.Assert(os.RemoveAll(memberDP), check.IsNil) // remove previous data +} + +func (t *testEtcdSuite) cloneConfig(cfg *Config) *Config { + clone := NewConfig() + *clone = *cfg + return clone +} + +func (t *testEtcdSuite) TestIsDataExist(c *check.C) { + d := "./directory-not-exists" + c.Assert(isDataExist(d), check.IsFalse) + + // empty directory + d = c.MkDir() + c.Assert(isDataExist(d), check.IsFalse) + + // data exists in the directory + for i := 1; i <= 3; i++ { + fp := filepath.Join(d, fmt.Sprintf("file.%d", i)) + c.Assert(ioutil.WriteFile(fp, nil, privateDirMode), check.IsNil) + c.Assert(isDataExist(d), check.IsTrue) + c.Assert(isDataExist(fp), check.IsFalse) // not a directory + } +} From aee2173253b0feb53cdebde2ad004bc187d5566e Mon Sep 17 00:00:00 2001 From: csuzhangxc Date: Tue, 12 Nov 2019 12:07:01 +0800 Subject: [PATCH 14/27] *: add more tests for prepareJoinEtcd; tiny fix `AdvertisePeerUrls` adjust --- _utils/terror_gen/errors_release.txt | 2 +- dm/master/config.go | 2 +- dm/master/etcd.go | 22 ++++----- dm/master/etcd_test.go | 70 ++++++++++++++++++++++++---- go.mod | 1 + pkg/terror/error_list.go | 2 +- 6 files changed, 77 insertions(+), 22 deletions(-) diff --git a/_utils/terror_gen/errors_release.txt b/_utils/terror_gen/errors_release.txt index ed32705e41..823b368078 100644 --- a/_utils/terror_gen/errors_release.txt +++ b/_utils/terror_gen/errors_release.txt @@ -316,7 +316,7 @@ ErrMasterGetHostnameFail,[code=38036:class=dm-master:scope=internal:level=high], ErrMasterGenEmbedEtcdConfigFail,[code=38037:class=dm-master:scope=internal:level=high],"generate config item %s for embed etcd fail" ErrMasterStartEmbedEtcdFail,[code=38038:class=dm-master:scope=internal:level=high],"start embed etcd fail" ErrMasterParseURLFail,[code=38039:class=dm-master:scope=internal:level=high],"parse URL %s fail" -ErrMasterJoinEmbedEtcdFail,[code=38040:class=dm-master:scope=internal:level=high],"join embed etcd fail" +ErrMasterJoinEmbedEtcdFail,[code=38040:class=dm-master:scope=internal:level=high],"fail to join embed etcd: %s" ErrWorkerParseFlagSet,[code=40001:class=dm-worker:scope=internal:level=medium],"parse dm-worker config flag set" ErrWorkerInvalidFlag,[code=40002:class=dm-worker:scope=internal:level=medium],"'%s' is an invalid flag" ErrWorkerDecodeConfigFromFile,[code=40003:class=dm-worker:scope=internal:level=medium],"toml decode file" diff --git a/dm/master/config.go b/dm/master/config.go index f1b87d0b8d..6b0922a2e5 100644 --- a/dm/master/config.go +++ b/dm/master/config.go @@ -254,7 +254,7 @@ func (c *Config) adjust() error { } if c.AdvertisePeerUrls == "" { - c.AdvertisePeerUrls = defaultPeerUrls + c.AdvertisePeerUrls = c.PeerUrls } if c.InitialCluster == "" { diff --git a/dm/master/etcd.go b/dm/master/etcd.go index 5b3e68b85b..567809225a 100644 --- a/dm/master/etcd.go +++ b/dm/master/etcd.go @@ -78,7 +78,7 @@ func prepareJoinEtcd(cfg *Config) error { // try to join self, invalid if cfg.Join == cfg.AdvertisePeerUrls { - return terror.ErrMasterJoinEmbedEtcdFail.Generatef("join self %s is forbidden", cfg.Join) + return terror.ErrMasterJoinEmbedEtcdFail.Generate(fmt.Sprintf("join self %s is forbidden", cfg.Join)) } // join with persistent data @@ -86,7 +86,7 @@ func prepareJoinEtcd(cfg *Config) error { if _, err := os.Stat(joinFP); !os.IsNotExist(err) { s, err := ioutil.ReadFile(joinFP) if err != nil { - return terror.ErrMasterJoinEmbedEtcdFail.AnnotateDelegate(err, "read persistent join data") + return terror.ErrMasterJoinEmbedEtcdFail.Delegate(err, "read persistent join data") } cfg.InitialCluster = strings.TrimSpace(string(s)) cfg.InitialClusterState = embed.ClusterStateFlagExisting @@ -106,31 +106,31 @@ func prepareJoinEtcd(cfg *Config) error { DialTimeout: etcdutil.DefaultDialTimeout, }) if err != nil { - return terror.ErrMasterJoinEmbedEtcdFail.Delegate(err) + return terror.ErrMasterJoinEmbedEtcdFail.Delegate(err, fmt.Sprintf("create etcd client for %s", cfg.Join)) } defer client.Close() // `member list` listResp, err := etcdutil.ListMembers(client) if err != nil { - return terror.ErrMasterJoinEmbedEtcdFail.Delegate(err) + return terror.ErrMasterJoinEmbedEtcdFail.Delegate(err, fmt.Sprintf("list member for %s", cfg.Join)) } // check members for _, m := range listResp.Members { if m.Name == "" { - return terror.ErrMasterJoinEmbedEtcdFail.New("there is a member that has not joined successfully") + return terror.ErrMasterJoinEmbedEtcdFail.Generate("there is a member that has not joined successfully") } if m.Name == cfg.Name { // a failed DM-master re-joins the previous cluster. - return terror.ErrMasterJoinEmbedEtcdFail.Generatef("missing data or joining a duplicated dm-master %s", m.Name) + return terror.ErrMasterJoinEmbedEtcdFail.Generate(fmt.Sprintf("missing data or joining a duplicate member %s", m.Name)) } } // `member add`, a new/deleted DM-master joins to an existing cluster. addResp, err := etcdutil.AddMember(client, strings.Split(cfg.AdvertisePeerUrls, ",")) if err != nil { - return terror.ErrMasterJoinEmbedEtcdFail.Delegate(err) + return terror.ErrMasterJoinEmbedEtcdFail.Delegate(err, fmt.Sprintf("add member %s", cfg.AdvertisePeerUrls)) } // generate `--initial-cluster` @@ -141,7 +141,7 @@ func prepareJoinEtcd(cfg *Config) error { name = cfg.Name } if name == "" { - return terror.ErrMasterJoinEmbedEtcdFail.New("there is a member that has not joined successfully") + return terror.ErrMasterJoinEmbedEtcdFail.Generate("there is a member that has not joined successfully") } for _, url := range m.PeerURLs { ms = append(ms, fmt.Sprintf("%s=%s", name, url)) @@ -151,11 +151,11 @@ func prepareJoinEtcd(cfg *Config) error { cfg.InitialClusterState = embed.ClusterStateFlagExisting // save `--initial-cluster` in persist data - if err = os.MkdirAll(cfg.DataDir, privateDirMode); !os.IsExist(err) { - return terror.ErrMasterJoinEmbedEtcdFail.Delegate(err) + if err = os.MkdirAll(cfg.DataDir, privateDirMode); err != nil && !os.IsExist(err) { + return terror.ErrMasterJoinEmbedEtcdFail.Delegate(err, "make directory") } if err = ioutil.WriteFile(joinFP, []byte(cfg.InitialCluster), privateDirMode); err != nil { - return terror.ErrMasterJoinEmbedEtcdFail.Delegate(err) + return terror.ErrMasterJoinEmbedEtcdFail.Delegate(err, "write persistent join data") } return nil diff --git a/dm/master/etcd_test.go b/dm/master/etcd_test.go index bb03b01a4c..55b807c164 100644 --- a/dm/master/etcd_test.go +++ b/dm/master/etcd_test.go @@ -18,8 +18,11 @@ import ( "io/ioutil" "os" "path/filepath" + "sort" + "strings" "github.com/pingcap/check" + "github.com/pingcap/pd/pkg/tempurl" "go.etcd.io/etcd/embed" "github.com/pingcap/dm/pkg/terror" @@ -31,13 +34,23 @@ type testEtcdSuite struct { } func (t *testEtcdSuite) TestPrepareJoinEtcd(c *check.C) { - joinCluster := "dm-master-1=http://172.100.100.100:8269" - - cfgBefore := NewConfig() // before `prepareJoinEtcd` applied - cfgBefore.MasterAddr = ":8261" - cfgBefore.DataDir = c.MkDir() + cfgCluster := NewConfig() // used to start an etcd cluster + cfgCluster.Name = "dm-master-1" + cfgCluster.DataDir = c.MkDir() + cfgCluster.MasterAddr = tempurl.Alloc()[len("http://"):] + cfgCluster.PeerUrls = tempurl.Alloc() + c.Assert(cfgCluster.adjust(), check.IsNil) + + cfgBefore := t.cloneConfig(cfgCluster) // before `prepareJoinEtcd` applied + cfgBefore.DataDir = c.MkDir() // overwrite some config items + cfgBefore.MasterAddr = tempurl.Alloc()[len("http://"):] + cfgBefore.PeerUrls = tempurl.Alloc() + cfgBefore.AdvertisePeerUrls = cfgBefore.PeerUrls c.Assert(cfgBefore.adjust(), check.IsNil) + cfgAfter := t.cloneConfig(cfgBefore) // after `prepareJoinEtcd applied + + joinCluster := cfgCluster.PeerUrls joinFP := filepath.Join(cfgBefore.DataDir, "join") memberDP := filepath.Join(cfgBefore.DataDir, "member") @@ -49,7 +62,7 @@ func (t *testEtcdSuite) TestPrepareJoinEtcd(c *check.C) { cfgAfter.Join = cfgAfter.AdvertisePeerUrls err := prepareJoinEtcd(cfgAfter) c.Assert(terror.ErrMasterJoinEmbedEtcdFail.Equal(err), check.IsTrue) - c.Assert(err, check.ErrorMatches, ".*join self.*is forbidden.*") + c.Assert(err, check.ErrorMatches, ".*fail to join embed etcd: join self.*is forbidden.*") // update `join` to a valid item cfgBefore.Join = joinCluster @@ -67,8 +80,9 @@ func (t *testEtcdSuite) TestPrepareJoinEtcd(c *check.C) { cfgAfter = t.cloneConfig(cfgBefore) err = prepareJoinEtcd(cfgAfter) c.Assert(terror.ErrMasterJoinEmbedEtcdFail.Equal(err), check.IsTrue) - c.Assert(err, check.ErrorMatches, ".*read persistent join data.*") - c.Assert(os.Remove(joinFP), check.IsNil) // remove the persistent data + c.Assert(err, check.ErrorMatches, ".*fail to join embed etcd: read persistent join data.*") + c.Assert(os.Remove(joinFP), check.IsNil) // remove the persistent data + c.Assert(cfgAfter, check.DeepEquals, cfgBefore) // not changed // restart with previous data c.Assert(os.Mkdir(memberDP, privateDirMode), check.IsNil) @@ -77,6 +91,46 @@ func (t *testEtcdSuite) TestPrepareJoinEtcd(c *check.C) { c.Assert(cfgAfter.InitialCluster, check.Equals, "") c.Assert(cfgAfter.InitialClusterState, check.Equals, embed.ClusterStateFlagExisting) c.Assert(os.RemoveAll(memberDP), check.IsNil) // remove previous data + + // start an etcd cluster + e, err := startEtcd(cfgCluster, nil, nil) + c.Assert(err, check.IsNil) + defer e.Close() + + // same `name`, duplicate + cfgAfter = t.cloneConfig(cfgBefore) + err = prepareJoinEtcd(cfgAfter) + c.Assert(terror.ErrMasterJoinEmbedEtcdFail.Equal(err), check.IsTrue) + c.Assert(err, check.ErrorMatches, ".*fail to join embed etcd: missing data or joining a duplicate member.*") + c.Assert(cfgAfter, check.DeepEquals, cfgBefore) // not changed + + // set a different name + cfgBefore.Name = "dm-master-2" + + // add member with invalid `advertise-peer-urls` + cfgAfter = t.cloneConfig(cfgBefore) + cfgAfter.AdvertisePeerUrls = "invalid-advertise-peer-urls" + err = prepareJoinEtcd(cfgAfter) + c.Assert(terror.ErrMasterJoinEmbedEtcdFail.Equal(err), check.IsTrue) + c.Assert(err, check.ErrorMatches, ".*fail to join embed etcd: add member.*") + + // join with existing cluster + cfgAfter = t.cloneConfig(cfgBefore) + c.Assert(prepareJoinEtcd(cfgAfter), check.IsNil) + c.Assert(cfgAfter.InitialClusterState, check.Equals, embed.ClusterStateFlagExisting) + obtainClusters := strings.Split(cfgAfter.InitialCluster, ",") + sort.Strings(obtainClusters) + expectedClusters := []string{ + cfgCluster.InitialCluster, + fmt.Sprintf("%s=%s", cfgAfter.Name, cfgAfter.PeerUrls), + } + sort.Strings(expectedClusters) + c.Assert(obtainClusters, check.DeepEquals, expectedClusters) + + // join data should exist now + joinData, err := ioutil.ReadFile(joinFP) + c.Assert(err, check.IsNil) + c.Assert(string(joinData), check.Equals, cfgAfter.InitialCluster) } func (t *testEtcdSuite) cloneConfig(cfg *Config) *Config { diff --git a/go.mod b/go.mod index ad0a3b8156..e061fb0bd3 100644 --- a/go.mod +++ b/go.mod @@ -36,6 +36,7 @@ require ( github.com/pingcap/kvproto v0.0.0-20190827032240-9696cd0c6acb // indirect github.com/pingcap/log v0.0.0-20190715063458-479153f07ebd github.com/pingcap/parser v0.0.0-20191008032157-51a2e3b2e34b + github.com/pingcap/pd v0.0.0-20190712044914-75a1f9f3062b github.com/pingcap/tidb v0.0.0-20190827060935-cc07b110825e github.com/pingcap/tidb-tools v3.0.0-beta.1.0.20190821032033-e6ccf3994944+incompatible github.com/pingcap/tipb v0.0.0-20191008064422-018b2fadf414 // indirect diff --git a/pkg/terror/error_list.go b/pkg/terror/error_list.go index 9b4b300799..bf6b5900ac 100644 --- a/pkg/terror/error_list.go +++ b/pkg/terror/error_list.go @@ -827,7 +827,7 @@ var ( ErrMasterGenEmbedEtcdConfigFail = New(codeMasterGenEmbedEtcdConfigFail, ClassDMMaster, ScopeInternal, LevelHigh, "generate config item %s for embed etcd fail") ErrMasterStartEmbedEtcdFail = New(codeMasterStartEmbedEtcdFail, ClassDMMaster, ScopeInternal, LevelHigh, "start embed etcd fail") ErrMasterParseURLFail = New(codeMasterParseURLFail, ClassDMMaster, ScopeInternal, LevelHigh, "parse URL %s fail") - ErrMasterJoinEmbedEtcdFail = New(codeMasterJoinEmbedEtcdFail, ClassDMMaster, ScopeInternal, LevelHigh, "join embed etcd fail") + ErrMasterJoinEmbedEtcdFail = New(codeMasterJoinEmbedEtcdFail, ClassDMMaster, ScopeInternal, LevelHigh, "fail to join embed etcd: %s") // DM-worker error ErrWorkerParseFlagSet = New(codeWorkerParseFlagSet, ClassDMWorker, ScopeInternal, LevelMedium, "parse dm-worker config flag set") From abec6e2fe4653d8e894d13b0115b3f22442b8fb8 Mon Sep 17 00:00:00 2001 From: csuzhangxc Date: Tue, 12 Nov 2019 14:33:17 +0800 Subject: [PATCH 15/27] master: address comment --- dm/master/config.go | 2 +- dm/master/config_test.go | 54 ++++++++++++++++++++-------------------- dm/master/dm-master.toml | 8 +++--- 3 files changed, 32 insertions(+), 32 deletions(-) diff --git a/dm/master/config.go b/dm/master/config.go index c45b2edc1a..c895f540d2 100644 --- a/dm/master/config.go +++ b/dm/master/config.go @@ -38,7 +38,7 @@ const ( defaultRPCTimeout = "30s" defaultNamePrefix = "dm-master" defaultDataDirPrefix = "default" - defaultPeerUrls = "http://127.0.0.1:8269" + defaultPeerUrls = "http://127.0.0.1:8291" ) // SampleConfigFile is sample config file of dm-master diff --git a/dm/master/config_test.go b/dm/master/config_test.go index 419962b766..553c400fb6 100644 --- a/dm/master/config_test.go +++ b/dm/master/config_test.go @@ -80,9 +80,9 @@ func (t *testConfigSuite) TestConfig(c *check.C) { masterAddr = ":8261" name = "dm-master" dataDir = "default.dm-master" - peerURLs = "http://127.0.0.1:8269" - advertisePeerURLs = "http://127.0.0.1:8269" - initialCluster = "dm-master=http://127.0.0.1:8269" + peerURLs = "http://127.0.0.1:8291" + advertisePeerURLs = "http://127.0.0.1:8291" + initialCluster = "dm-master=http://127.0.0.1:8291" deployMap = map[string]string{ "mysql-replica-01": "172.16.10.72:8262", "mysql-replica-02": "172.16.10.73:8262", @@ -235,9 +235,9 @@ func (t *testConfigSuite) TestGenEmbedEtcdConfig(c *check.C) { c.Assert(etcdCfg.Dir, check.Equals, fmt.Sprintf("default.%s", etcdCfg.Name)) c.Assert(etcdCfg.LCUrls, check.DeepEquals, []url.URL{{Scheme: "http", Host: "0.0.0.0:8261"}}) c.Assert(etcdCfg.ACUrls, check.DeepEquals, []url.URL{{Scheme: "http", Host: "0.0.0.0:8261"}}) - c.Assert(etcdCfg.LPUrls, check.DeepEquals, []url.URL{{Scheme: "http", Host: "127.0.0.1:8269"}}) - c.Assert(etcdCfg.APUrls, check.DeepEquals, []url.URL{{Scheme: "http", Host: "127.0.0.1:8269"}}) - c.Assert(etcdCfg.InitialCluster, check.DeepEquals, fmt.Sprintf("dm-master-%s=http://127.0.0.1:8269", hostname)) + c.Assert(etcdCfg.LPUrls, check.DeepEquals, []url.URL{{Scheme: "http", Host: "127.0.0.1:8291"}}) + c.Assert(etcdCfg.APUrls, check.DeepEquals, []url.URL{{Scheme: "http", Host: "127.0.0.1:8291"}}) + c.Assert(etcdCfg.InitialCluster, check.DeepEquals, fmt.Sprintf("dm-master-%s=http://127.0.0.1:8291", hostname)) cfg2 := *cfg1 cfg2.MasterAddr = "127.0.0.1\n:8261" @@ -251,24 +251,24 @@ func (t *testConfigSuite) TestGenEmbedEtcdConfig(c *check.C) { c.Assert(etcdCfg.ACUrls, check.DeepEquals, []url.URL{{Scheme: "http", Host: "172.100.8.8:8261"}}) cfg3 := *cfg1 - cfg3.PeerUrls = "127.0.0.1:\n8269" + cfg3.PeerUrls = "127.0.0.1:\n8291" _, err = cfg3.genEmbedEtcdConfig() c.Assert(terror.ErrMasterGenEmbedEtcdConfigFail.Equal(err), check.IsTrue) c.Assert(err, check.ErrorMatches, "(?m).*invalid peer-urls.*") - cfg3.PeerUrls = "http://172.100.8.8:8269" + cfg3.PeerUrls = "http://172.100.8.8:8291" etcdCfg, err = cfg3.genEmbedEtcdConfig() c.Assert(err, check.IsNil) - c.Assert(etcdCfg.LPUrls, check.DeepEquals, []url.URL{{Scheme: "http", Host: "172.100.8.8:8269"}}) + c.Assert(etcdCfg.LPUrls, check.DeepEquals, []url.URL{{Scheme: "http", Host: "172.100.8.8:8291"}}) cfg4 := *cfg1 - cfg4.AdvertisePeerUrls = "127.0.0.1:\n8269" + cfg4.AdvertisePeerUrls = "127.0.0.1:\n8291" _, err = cfg4.genEmbedEtcdConfig() c.Assert(terror.ErrMasterGenEmbedEtcdConfigFail.Equal(err), check.IsTrue) c.Assert(err, check.ErrorMatches, "(?m).*invalid advertise-peer-urls.*") - cfg4.AdvertisePeerUrls = "http://172.100.8.8:8269" + cfg4.AdvertisePeerUrls = "http://172.100.8.8:8291" etcdCfg, err = cfg4.genEmbedEtcdConfig() c.Assert(err, check.IsNil) - c.Assert(etcdCfg.APUrls, check.DeepEquals, []url.URL{{Scheme: "http", Host: "172.100.8.8:8269"}}) + c.Assert(etcdCfg.APUrls, check.DeepEquals, []url.URL{{Scheme: "http", Host: "172.100.8.8:8291"}}) } func (t *testConfigSuite) TestParseURLs(c *check.C) { @@ -279,41 +279,41 @@ func (t *testConfigSuite) TestParseURLs(c *check.C) { }{ {}, // empty str { - str: "http://127.0.0.1:8269", - urls: []url.URL{{Scheme: "http", Host: "127.0.0.1:8269"}}, + str: "http://127.0.0.1:8291", + urls: []url.URL{{Scheme: "http", Host: "127.0.0.1:8291"}}, }, { - str: "http://127.0.0.1:8269,http://127.0.0.1:18269", + str: "http://127.0.0.1:8291,http://127.0.0.1:18291", urls: []url.URL{ - {Scheme: "http", Host: "127.0.0.1:8269"}, - {Scheme: "http", Host: "127.0.0.1:18269"}, + {Scheme: "http", Host: "127.0.0.1:8291"}, + {Scheme: "http", Host: "127.0.0.1:18291"}, }, }, { - str: "127.0.0.1:8269", // no scheme - urls: []url.URL{{Scheme: "http", Host: "127.0.0.1:8269"}}, + str: "127.0.0.1:8291", // no scheme + urls: []url.URL{{Scheme: "http", Host: "127.0.0.1:8291"}}, }, { - str: "http://:8269", // no IP - urls: []url.URL{{Scheme: "http", Host: "0.0.0.0:8269"}}, + str: "http://:8291", // no IP + urls: []url.URL{{Scheme: "http", Host: "0.0.0.0:8291"}}, }, { - str: ":8269", // no scheme, no IP - urls: []url.URL{{Scheme: "http", Host: "0.0.0.0:8269"}}, + str: ":8291", // no scheme, no IP + urls: []url.URL{{Scheme: "http", Host: "0.0.0.0:8291"}}, }, { str: "http://", // no IP, no port urls: []url.URL{{Scheme: "http", Host: ""}}, }, { - str: "http://\n127.0.0.1:8269", // invalid char in URL + str: "http://\n127.0.0.1:8291", // invalid char in URL hasErr: true, }, { - str: ":8269,http://127.0.0.1:18269", + str: ":8291,http://127.0.0.1:18291", urls: []url.URL{ - {Scheme: "http", Host: "0.0.0.0:8269"}, - {Scheme: "http", Host: "127.0.0.1:18269"}, + {Scheme: "http", Host: "0.0.0.0:8291"}, + {Scheme: "http", Host: "127.0.0.1:18291"}, }, }, } diff --git a/dm/master/dm-master.toml b/dm/master/dm-master.toml index 7209aee900..bcd08b20a3 100644 --- a/dm/master/dm-master.toml +++ b/dm/master/dm-master.toml @@ -14,13 +14,13 @@ name = "dm-master" data-dir = "default.dm-master" # URLs for peer traffic -peer-urls = "http://127.0.0.1:8269" +peer-urls = "http://127.0.0.1:8291" # advertise URLs for peer traffic (default '${peer-urls}') -advertise-peer-urls = "http://127.0.0.1:8269" +advertise-peer-urls = "http://127.0.0.1:8291" -# initial cluster configuration for bootstrapping, e,g. dm-master=http://127.0.0.1:8269 -initial-cluster = "dm-master=http://127.0.0.1:8269" +# initial cluster configuration for bootstrapping, e,g. dm-master=http://127.0.0.1:8291 +initial-cluster = "dm-master=http://127.0.0.1:8291" # rpc configuration # From a67bcf93a1d760a26269c63b579af4243473784c Mon Sep 17 00:00:00 2001 From: csuzhangxc Date: Tue, 12 Nov 2019 16:28:05 +0800 Subject: [PATCH 16/27] master: address comments --- dm/master/etcd.go | 10 +++++++--- dm/master/server.go | 8 ++++---- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/dm/master/etcd.go b/dm/master/etcd.go index a743664a0b..398b3be1cc 100644 --- a/dm/master/etcd.go +++ b/dm/master/etcd.go @@ -23,6 +23,11 @@ import ( "github.com/pingcap/dm/pkg/terror" ) +const ( + // time waiting for etcd to be started + etcdStartTimeout = time.Minute +) + // startEtcd starts an embedded etcd server. func startEtcd(masterCfg *Config, gRPCSvr func(*grpc.Server), @@ -45,13 +50,12 @@ func startEtcd(masterCfg *Config, return nil, terror.ErrMasterStartEmbedEtcdFail.Delegate(err) } - timeout := time.Minute select { case <-e.Server.ReadyNotify(): - case <-time.After(timeout): + case <-time.After(etcdStartTimeout): e.Server.Stop() e.Close() - return nil, terror.ErrMasterStartEmbedEtcdFail.Generatef("start embed etcd timeout %v", timeout) + return nil, terror.ErrMasterStartEmbedEtcdFail.Generatef("start embed etcd timeout %v", etcdStartTimeout) } return e, nil } diff --git a/dm/master/server.go b/dm/master/server.go index 5f6f315632..58ef63a518 100644 --- a/dm/master/server.go +++ b/dm/master/server.go @@ -99,14 +99,14 @@ func (s *Server) Start(ctx context.Context) (err error) { for _, workerAddr := range s.cfg.DeployMap { s.workerClients[workerAddr], err = workerrpc.NewGRPCClient(workerAddr) if err != nil { - return err + return } } // get an HTTP to gRPC API handler. apiHandler, err := getHTTPAPIHandler(ctx, s.cfg.MasterAddr) if err != nil { - return err + return } // HTTP handlers on etcd's client IP:port @@ -129,7 +129,7 @@ func (s *Server) Start(ctx context.Context) (err error) { // start embed etcd server, gRPC API server and HTTP (API, status and debug) server. s.etcd, err = startEtcd(s.cfg, gRPCSvr, userHandles) if err != nil { - return err + return } s.closed.Set(false) // the server started now. @@ -160,7 +160,7 @@ func (s *Server) Start(ctx context.Context) (err error) { }() log.L().Info("listening gRPC API and status request", zap.String("address", s.cfg.MasterAddr)) - return nil + return } // Close close the RPC server, this function can be called multiple times From 6255adcca1c4071910cc74ffdc99dc6564297851 Mon Sep 17 00:00:00 2001 From: csuzhangxc Date: Tue, 12 Nov 2019 17:29:17 +0800 Subject: [PATCH 17/27] master: fix merge --- dm/master/etcd.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/dm/master/etcd.go b/dm/master/etcd.go index 5ddeff7ec8..fe5e4f0e73 100644 --- a/dm/master/etcd.go +++ b/dm/master/etcd.go @@ -17,8 +17,12 @@ import ( "fmt" "io/ioutil" "net/http" + "os" + "path/filepath" + "strings" "time" + "go.etcd.io/etcd/clientv3" "go.etcd.io/etcd/embed" "google.golang.org/grpc" From d30a8a780dd30e0db7a45e38e7b071e309e6d791 Mon Sep 17 00:00:00 2001 From: csuzhangxc Date: Tue, 12 Nov 2019 17:35:54 +0800 Subject: [PATCH 18/27] *: go mod tidy --- go.mod | 1 + go.sum | 2 ++ 2 files changed, 3 insertions(+) diff --git a/go.mod b/go.mod index e061fb0bd3..d9defea3f5 100644 --- a/go.mod +++ b/go.mod @@ -73,6 +73,7 @@ require ( google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55 google.golang.org/grpc v1.23.0 gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15 // indirect + gopkg.in/stretchr/testify.v1 v1.2.2 // indirect gopkg.in/yaml.v2 v2.2.4 sigs.k8s.io/yaml v1.1.0 // indirect sourcegraph.com/sourcegraph/appdash v0.0.0-20190731080439-ebfcffb1b5c0 // indirect diff --git a/go.sum b/go.sum index 7dbe5cb5e8..55bd2239d9 100644 --- a/go.sum +++ b/go.sum @@ -533,6 +533,8 @@ gopkg.in/gemnasium/logrus-airbrake-hook.v2 v2.1.2/go.mod h1:Xk6kEKp8OKb+X14hQBKW gopkg.in/natefinch/lumberjack.v2 v2.0.0 h1:1Lc07Kr7qY4U2YPouBjpCLxpiyxIVoxqXgkXLknAOE8= gopkg.in/natefinch/lumberjack.v2 v2.0.0/go.mod h1:l0ndWWf7gzL7RNwBG7wST/UCcT4T24xpD6X8LsfU/+k= gopkg.in/resty.v1 v1.12.0/go.mod h1:mDo4pnntr5jdWRML875a/NmxYqAlA73dVijT2AXvQQo= +gopkg.in/stretchr/testify.v1 v1.2.2 h1:yhQC6Uy5CqibAIlk1wlusa/MJ3iAN49/BsR/dCCKz3M= +gopkg.in/stretchr/testify.v1 v1.2.2/go.mod h1:QI5V/q6UbPmuhtm10CaFZxED9NreB8PnFYN9JcR6TxU= gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 h1:uRGJdciOHaEIrze2W8Q3AKkepLTh2hOroT7a+7czfdQ= gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWDmTeBkI65Dw0HsyUHuEVlX15mw= gopkg.in/yaml.v2 v2.0.0-20170812160011-eb3733d160e7/go.mod h1:JAlM8MvJe8wmxCU4Bli9HhUf9+ttbYbLASfIpnQbh74= From aacb1ed6e07aed7e7e9be54347689cf80e530b28 Mon Sep 17 00:00:00 2001 From: csuzhangxc Date: Tue, 12 Nov 2019 17:46:47 +0800 Subject: [PATCH 19/27] master: use random port for testing --- dm/master/server_test.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dm/master/server_test.go b/dm/master/server_test.go index 7e4cedcd69..41e549ed44 100644 --- a/dm/master/server_test.go +++ b/dm/master/server_test.go @@ -36,6 +36,7 @@ import ( "github.com/pingcap/dm/dm/pbmock" "github.com/pingcap/dm/pkg/terror" "github.com/pingcap/dm/pkg/utils" + "github.com/pingcap/pd/pkg/tempurl" ) // use task config from integration test `sharding` @@ -1489,7 +1490,7 @@ func (t *testMaster) TestServer(c *check.C) { cfg := NewConfig() c.Assert(cfg.Parse([]string{"-config=./dm-master.toml"}), check.IsNil) cfg.DataDir = c.MkDir() - cfg.MasterAddr = "127.0.0.1:18261" // use a different port + cfg.MasterAddr = tempurl.Alloc()[len("http://"):] s := NewServer(cfg) From 2a042e3f1f563e8d609617a5f9573f39cdffb609 Mon Sep 17 00:00:00 2001 From: csuzhangxc Date: Tue, 12 Nov 2019 17:51:24 +0800 Subject: [PATCH 20/27] terror: update error message --- _utils/terror_gen/errors_release.txt | 6 +++--- pkg/terror/error_list.go | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/_utils/terror_gen/errors_release.txt b/_utils/terror_gen/errors_release.txt index 823b368078..68048ea01b 100644 --- a/_utils/terror_gen/errors_release.txt +++ b/_utils/terror_gen/errors_release.txt @@ -313,9 +313,9 @@ ErrMasterOperRequestTimeout,[code=38033:class=dm-master:scope=internal:level=hig ErrMasterHandleHTTPApis,[code=38034:class=dm-master:scope=internal:level=high],"serve http apis to grpc" ErrMasterHostPortNotValid,[code=38035:class=dm-master:scope=internal:level=high],"host:port '%s' not valid" ErrMasterGetHostnameFail,[code=38036:class=dm-master:scope=internal:level=high],"get hostname fail" -ErrMasterGenEmbedEtcdConfigFail,[code=38037:class=dm-master:scope=internal:level=high],"generate config item %s for embed etcd fail" -ErrMasterStartEmbedEtcdFail,[code=38038:class=dm-master:scope=internal:level=high],"start embed etcd fail" -ErrMasterParseURLFail,[code=38039:class=dm-master:scope=internal:level=high],"parse URL %s fail" +ErrMasterGenEmbedEtcdConfigFail,[code=38037:class=dm-master:scope=internal:level=high],"fail to generate config item %s for embed etcd" +ErrMasterStartEmbedEtcdFail,[code=38038:class=dm-master:scope=internal:level=high],"fail to start embed etcd" +ErrMasterParseURLFail,[code=38039:class=dm-master:scope=internal:level=high],"fail to parse URL %s" ErrMasterJoinEmbedEtcdFail,[code=38040:class=dm-master:scope=internal:level=high],"fail to join embed etcd: %s" ErrWorkerParseFlagSet,[code=40001:class=dm-worker:scope=internal:level=medium],"parse dm-worker config flag set" ErrWorkerInvalidFlag,[code=40002:class=dm-worker:scope=internal:level=medium],"'%s' is an invalid flag" diff --git a/pkg/terror/error_list.go b/pkg/terror/error_list.go index bf6b5900ac..17215b13c8 100644 --- a/pkg/terror/error_list.go +++ b/pkg/terror/error_list.go @@ -824,9 +824,9 @@ var ( ErrMasterHandleHTTPApis = New(codeMasterHandleHTTPApis, ClassDMMaster, ScopeInternal, LevelHigh, "serve http apis to grpc") ErrMasterHostPortNotValid = New(codeMasterHostPortNotValid, ClassDMMaster, ScopeInternal, LevelHigh, "host:port '%s' not valid") ErrMasterGetHostnameFail = New(codeMasterGetHostnameFail, ClassDMMaster, ScopeInternal, LevelHigh, "get hostname fail") - ErrMasterGenEmbedEtcdConfigFail = New(codeMasterGenEmbedEtcdConfigFail, ClassDMMaster, ScopeInternal, LevelHigh, "generate config item %s for embed etcd fail") - ErrMasterStartEmbedEtcdFail = New(codeMasterStartEmbedEtcdFail, ClassDMMaster, ScopeInternal, LevelHigh, "start embed etcd fail") - ErrMasterParseURLFail = New(codeMasterParseURLFail, ClassDMMaster, ScopeInternal, LevelHigh, "parse URL %s fail") + ErrMasterGenEmbedEtcdConfigFail = New(codeMasterGenEmbedEtcdConfigFail, ClassDMMaster, ScopeInternal, LevelHigh, "fail to generate config item %s for embed etcd") + ErrMasterStartEmbedEtcdFail = New(codeMasterStartEmbedEtcdFail, ClassDMMaster, ScopeInternal, LevelHigh, "fail to start embed etcd") + ErrMasterParseURLFail = New(codeMasterParseURLFail, ClassDMMaster, ScopeInternal, LevelHigh, "fail to parse URL %s") ErrMasterJoinEmbedEtcdFail = New(codeMasterJoinEmbedEtcdFail, ClassDMMaster, ScopeInternal, LevelHigh, "fail to join embed etcd: %s") // DM-worker error From a388e33892fd9ef1b1ca796683b01d5cd60b6a26 Mon Sep 17 00:00:00 2001 From: csuzhangxc Date: Tue, 12 Nov 2019 18:03:57 +0800 Subject: [PATCH 21/27] master: update sample config --- dm/master/dm-master.toml | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/dm/master/dm-master.toml b/dm/master/dm-master.toml index bcd08b20a3..dcae21d803 100644 --- a/dm/master/dm-master.toml +++ b/dm/master/dm-master.toml @@ -22,6 +22,15 @@ advertise-peer-urls = "http://127.0.0.1:8291" # initial cluster configuration for bootstrapping, e,g. dm-master=http://127.0.0.1:8291 initial-cluster = "dm-master=http://127.0.0.1:8291" +# Initial cluster state ("new" or "existing"). +# Set to "new" for all members present during initial static or DNS bootstrapping. +# If this option is set to "existing", DM-master will attempt to join the existing cluster. +# If the wrong value is set, DM-master will attempt to start but fail safely. +initial-cluster-state = "new" + +# Join to an existing pd cluster, a string of existing cluster's endpoints. +join = "" + # rpc configuration # # rpc timeout is a positive number plus time unit. we use golang standard time From 6fa6bf29220dbd6166e0a7afe298dbfabfa9a0a6 Mon Sep 17 00:00:00 2001 From: csuzhangxc Date: Tue, 12 Nov 2019 19:29:47 +0800 Subject: [PATCH 22/27] master: update config test --- dm/master/config.go | 1 + dm/master/config_test.go | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/dm/master/config.go b/dm/master/config.go index e5e067ef55..8bfb1d8bd6 100644 --- a/dm/master/config.go +++ b/dm/master/config.go @@ -322,6 +322,7 @@ func (c *Config) genEmbedEtcdConfig() (*embed.Config, error) { } cfg.InitialCluster = c.InitialCluster + cfg.ClusterState = c.InitialClusterState return cfg, nil } diff --git a/dm/master/config_test.go b/dm/master/config_test.go index 553c400fb6..c887f0e562 100644 --- a/dm/master/config_test.go +++ b/dm/master/config_test.go @@ -25,6 +25,7 @@ import ( capturer "github.com/kami-zh/go-capturer" "github.com/pingcap/check" + "go.etcd.io/etcd/embed" "github.com/pingcap/dm/pkg/terror" ) @@ -133,6 +134,8 @@ func (t *testConfigSuite) TestConfig(c *check.C) { c.Assert(cfg.PeerUrls, check.Equals, peerURLs) c.Assert(cfg.AdvertisePeerUrls, check.Equals, advertisePeerURLs) c.Assert(cfg.InitialCluster, check.Equals, initialCluster) + c.Assert(cfg.InitialClusterState, check.Equals, embed.ClusterStateFlagNew) + c.Assert(cfg.Join, check.Equals, "") c.Assert(cfg.DeployMap, check.DeepEquals, deployMap) c.Assert(cfg.String(), check.Matches, fmt.Sprintf("{.*master-addr\":\"%s\".*}", masterAddr)) } @@ -228,6 +231,7 @@ func (t *testConfigSuite) TestGenEmbedEtcdConfig(c *check.C) { cfg1 := NewConfig() cfg1.MasterAddr = ":8261" + cfg1.InitialClusterState = embed.ClusterStateFlagExisting c.Assert(cfg1.adjust(), check.IsNil) etcdCfg, err := cfg1.genEmbedEtcdConfig() c.Assert(err, check.IsNil) @@ -238,6 +242,7 @@ func (t *testConfigSuite) TestGenEmbedEtcdConfig(c *check.C) { c.Assert(etcdCfg.LPUrls, check.DeepEquals, []url.URL{{Scheme: "http", Host: "127.0.0.1:8291"}}) c.Assert(etcdCfg.APUrls, check.DeepEquals, []url.URL{{Scheme: "http", Host: "127.0.0.1:8291"}}) c.Assert(etcdCfg.InitialCluster, check.DeepEquals, fmt.Sprintf("dm-master-%s=http://127.0.0.1:8291", hostname)) + c.Assert(etcdCfg.ClusterState, check.Equals, embed.ClusterStateFlagExisting) cfg2 := *cfg1 cfg2.MasterAddr = "127.0.0.1\n:8261" From cdc9ef39148629d9417f6300299c709ad2c48c2d Mon Sep 17 00:00:00 2001 From: csuzhangxc Date: Tue, 12 Nov 2019 21:22:58 +0800 Subject: [PATCH 23/27] master: add more tests for prepareJoinEtcd --- dm/master/etcd_test.go | 34 ++++++++++++++++++++++++++++++++-- 1 file changed, 32 insertions(+), 2 deletions(-) diff --git a/dm/master/etcd_test.go b/dm/master/etcd_test.go index 55b807c164..a1e3bdf8c0 100644 --- a/dm/master/etcd_test.go +++ b/dm/master/etcd_test.go @@ -20,6 +20,7 @@ import ( "path/filepath" "sort" "strings" + "time" "github.com/pingcap/check" "github.com/pingcap/pd/pkg/tempurl" @@ -93,9 +94,9 @@ func (t *testEtcdSuite) TestPrepareJoinEtcd(c *check.C) { c.Assert(os.RemoveAll(memberDP), check.IsNil) // remove previous data // start an etcd cluster - e, err := startEtcd(cfgCluster, nil, nil) + e1, err := startEtcd(cfgCluster, nil, nil) c.Assert(err, check.IsNil) - defer e.Close() + defer e1.Close() // same `name`, duplicate cfgAfter = t.cloneConfig(cfgBefore) @@ -131,6 +132,35 @@ func (t *testEtcdSuite) TestPrepareJoinEtcd(c *check.C) { joinData, err := ioutil.ReadFile(joinFP) c.Assert(err, check.IsNil) c.Assert(string(joinData), check.Equals, cfgAfter.InitialCluster) + + // prepare join done, but has not start the etcd to complete the join, can not join anymore. + cfgAfter2 := t.cloneConfig(cfgBefore) + cfgAfter2.Name = "dm-master-3" // overwrite some items + cfgAfter2.DataDir = c.MkDir() + cfgAfter2.MasterAddr = tempurl.Alloc()[len("http://"):] + cfgAfter2.PeerUrls = tempurl.Alloc() + cfgAfter2.AdvertisePeerUrls = cfgAfter2.PeerUrls + err = prepareJoinEtcd(cfgAfter2) + c.Assert(terror.ErrMasterJoinEmbedEtcdFail.Equal(err), check.IsTrue) + c.Assert(err, check.ErrorMatches, ".*fail to join embed etcd: there is a member that has not joined successfully.*") + + // start the joining etcd + e2, err := startEtcd(cfgAfter, nil, nil) + c.Assert(err, check.IsNil) + defer e2.Close() + + // try join again + for i := 0; i < 10; i++ { + err = prepareJoinEtcd(cfgAfter2) + if err == nil { + break + } + // for `etcdserver: unhealthy cluster`, try again later + c.Assert(terror.ErrMasterJoinEmbedEtcdFail.Equal(err), check.IsTrue) + c.Assert(err, check.ErrorMatches, ".*fail to join embed etcd: add member.*: etcdserver: unhealthy cluster.*") + time.Sleep(time.Second) + } + c.Assert(err, check.IsNil) } func (t *testEtcdSuite) cloneConfig(cfg *Config) *Config { From 5fd202f28d35830343dc734d18c7d3ea2faff1be Mon Sep 17 00:00:00 2001 From: csuzhangxc Date: Tue, 12 Nov 2019 22:51:10 +0800 Subject: [PATCH 24/27] master: join member into existing cluster --- dm/master/server.go | 5 ++++ dm/master/server_test.go | 59 +++++++++++++++++++++++++++++++++++++++- 2 files changed, 63 insertions(+), 1 deletion(-) diff --git a/dm/master/server.go b/dm/master/server.go index 58ef63a518..cfb1737063 100644 --- a/dm/master/server.go +++ b/dm/master/server.go @@ -95,6 +95,11 @@ func NewServer(cfg *Config) *Server { // Start starts to serving func (s *Server) Start(ctx context.Context) (err error) { + err = prepareJoinEtcd(s.cfg) + if err != nil { + return + } + // create clients to DM-workers for _, workerAddr := range s.cfg.DeployMap { s.workerClients[workerAddr], err = workerrpc.NewGRPCClient(workerAddr) diff --git a/dm/master/server_test.go b/dm/master/server_test.go index 41e549ed44..0f1e6ec3fb 100644 --- a/dm/master/server_test.go +++ b/dm/master/server_test.go @@ -18,6 +18,7 @@ import ( "context" "io/ioutil" "net/http" + "strings" "fmt" "io" @@ -28,15 +29,17 @@ import ( "github.com/golang/mock/gomock" "github.com/pingcap/check" "github.com/pingcap/errors" + "github.com/pingcap/pd/pkg/tempurl" + "go.etcd.io/etcd/clientv3" "github.com/pingcap/dm/checker" "github.com/pingcap/dm/dm/config" "github.com/pingcap/dm/dm/master/workerrpc" "github.com/pingcap/dm/dm/pb" "github.com/pingcap/dm/dm/pbmock" + "github.com/pingcap/dm/pkg/etcdutil" "github.com/pingcap/dm/pkg/terror" "github.com/pingcap/dm/pkg/utils" - "github.com/pingcap/pd/pkg/tempurl" ) // use task config from integration test `sharding` @@ -1526,3 +1529,57 @@ func (t *testMaster) testHTTPInterface(c *check.C, url string, contain []byte) { c.Assert(err, check.IsNil) c.Assert(bytes.Contains(body, contain), check.IsTrue) } + +func (t *testMaster) TestJoinMember(c *check.C) { + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + + // create a new cluster + cfg1 := NewConfig() + c.Assert(cfg1.Parse([]string{"-config=./dm-master.toml"}), check.IsNil) + cfg1.Name = "dm-master-1" + cfg1.DataDir = c.MkDir() + cfg1.MasterAddr = tempurl.Alloc()[len("http://"):] + cfg1.PeerUrls = tempurl.Alloc() + cfg1.AdvertisePeerUrls = cfg1.PeerUrls + cfg1.InitialCluster = fmt.Sprintf("%s=%s", cfg1.Name, cfg1.AdvertisePeerUrls) + + s1 := NewServer(cfg1) + c.Assert(s1.Start(ctx), check.IsNil) + defer s1.Close() + + // join to an existing cluster + cfg2 := NewConfig() + c.Assert(cfg2.Parse([]string{"-config=./dm-master.toml"}), check.IsNil) + cfg2.Name = "dm-master-2" + cfg2.DataDir = c.MkDir() + cfg2.MasterAddr = tempurl.Alloc()[len("http://"):] + cfg2.PeerUrls = tempurl.Alloc() + cfg2.AdvertisePeerUrls = cfg2.PeerUrls + cfg2.Join = cfg1.AdvertisePeerUrls // join to an existing cluster + + s2 := NewServer(cfg2) + c.Assert(s2.Start(ctx), check.IsNil) + defer s2.Close() + + client, err := clientv3.New(clientv3.Config{ + Endpoints: strings.Split(cfg1.AdvertisePeerUrls, ","), + DialTimeout: etcdutil.DefaultDialTimeout, + }) + c.Assert(err, check.IsNil) + defer client.Close() + + // verify membersm + listResp, err := etcdutil.ListMembers(client) + c.Assert(err, check.IsNil) + c.Assert(listResp.Members, check.HasLen, 2) + names := make(map[string]struct{}, len(listResp.Members)) + for _, m := range listResp.Members { + names[m.Name] = struct{}{} + } + _, ok := names[cfg1.Name] + c.Assert(ok, check.IsTrue) + _, ok = names[cfg2.Name] + c.Assert(ok, check.IsTrue) + + cancel() +} From dcda7b23a1dad59bbdc8dd3d69e12d9cb89cc623 Mon Sep 17 00:00:00 2001 From: csuzhangxc Date: Wed, 13 Nov 2019 15:06:56 +0800 Subject: [PATCH 25/27] master: address comments --- dm/master/dm-master.toml | 8 +------- dm/master/etcd.go | 39 +++++++++++++++++++++++++++++---------- dm/master/server.go | 11 ++++++----- 3 files changed, 36 insertions(+), 22 deletions(-) diff --git a/dm/master/dm-master.toml b/dm/master/dm-master.toml index dcae21d803..2e51295207 100644 --- a/dm/master/dm-master.toml +++ b/dm/master/dm-master.toml @@ -22,13 +22,7 @@ advertise-peer-urls = "http://127.0.0.1:8291" # initial cluster configuration for bootstrapping, e,g. dm-master=http://127.0.0.1:8291 initial-cluster = "dm-master=http://127.0.0.1:8291" -# Initial cluster state ("new" or "existing"). -# Set to "new" for all members present during initial static or DNS bootstrapping. -# If this option is set to "existing", DM-master will attempt to join the existing cluster. -# If the wrong value is set, DM-master will attempt to start but fail safely. -initial-cluster-state = "new" - -# Join to an existing pd cluster, a string of existing cluster's endpoints. +# Join to an existing DM-master cluster, a string of existing cluster's endpoints. join = "" # rpc configuration diff --git a/dm/master/etcd.go b/dm/master/etcd.go index fe5e4f0e73..6571db8d91 100644 --- a/dm/master/etcd.go +++ b/dm/master/etcd.go @@ -71,6 +71,22 @@ func startEtcd(masterCfg *Config, // prepareJoinEtcd prepares config needed to join an existing cluster. // learn from https://github.com/pingcap/pd/blob/37efcb05f397f26c70cda8dd44acaa3061c92159/server/join/join.go#L44. +// +// when setting `initial-cluster` explicitly to bootstrap a new cluster: +// - if local persistent data exist, just restart the previous cluster (in fact, it's not bootstrapping). +// - if local persistent data not exist, just bootstrap the cluster. +// +// when setting `join` to join an existing cluster (without `initial-cluster` set): +// - if local persistent data exists (in fact, it's not join): +// - just restart if `member` already exists (already joined before) +// - read `initial-cluster` back from local persistent data to restart (just like bootstrapping) +// - if local persistent data not exist: +// 1. fetch member list from the cluster to check if we can join now. +// 2. call `member add` to add the member info into the cluster. +// 3. generate config for join (`initial-cluster` and `initial-cluster-state`). +// 4. save `initial-cluster` in local persistent data for later restarting. +// +// NOTE: A member can't join to another cluster after it has joined a previous one. func prepareJoinEtcd(cfg *Config) error { // no need to join if cfg.Join == "" { @@ -82,25 +98,25 @@ func prepareJoinEtcd(cfg *Config) error { return terror.ErrMasterJoinEmbedEtcdFail.Generate(fmt.Sprintf("join self %s is forbidden", cfg.Join)) } + // restart with previous data, no `InitialCluster` need to set + if isDataExist(filepath.Join(cfg.DataDir, "member")) { + cfg.InitialCluster = "" + cfg.InitialClusterState = embed.ClusterStateFlagExisting + return nil + } + // join with persistent data joinFP := filepath.Join(cfg.DataDir, "join") - if _, err := os.Stat(joinFP); !os.IsNotExist(err) { - s, err := ioutil.ReadFile(joinFP) - if err != nil { + if s, err := ioutil.ReadFile(joinFP); err != nil { + if !os.IsNotExist(err) { return terror.ErrMasterJoinEmbedEtcdFail.Delegate(err, "read persistent join data") } + } else { cfg.InitialCluster = strings.TrimSpace(string(s)) cfg.InitialClusterState = embed.ClusterStateFlagExisting return nil } - // restart with previous data, no `InitialCluster` need to set - if isDataExist(filepath.Join(cfg.DataDir, "member")) { - cfg.InitialCluster = "" - cfg.InitialClusterState = embed.ClusterStateFlagExisting - return nil - } - // if without previous data, we need a client to contact with the existing cluster. client, err := clientv3.New(clientv3.Config{ Endpoints: strings.Split(cfg.Join, ","), @@ -139,6 +155,9 @@ func prepareJoinEtcd(cfg *Config) error { for _, m := range addResp.Members { name := m.Name if m.ID == addResp.Member.ID { + // the member only called `member add`, + // but has not started the process to complete the join should have an empty name. + // so, we use the `name` in config instead. name = cfg.Name } if name == "" { diff --git a/dm/master/server.go b/dm/master/server.go index b2631ef061..3cc367a9df 100644 --- a/dm/master/server.go +++ b/dm/master/server.go @@ -95,11 +95,6 @@ func NewServer(cfg *Config) *Server { // Start starts to serving func (s *Server) Start(ctx context.Context) (err error) { - err = prepareJoinEtcd(s.cfg) - if err != nil { - return - } - // create clients to DM-workers for _, workerAddr := range s.cfg.DeployMap { s.workerClients[workerAddr], err = workerrpc.NewGRPCClient(workerAddr) @@ -131,6 +126,12 @@ func (s *Server) Start(ctx context.Context) (err error) { // gRPC API server gRPCSvr := func(gs *grpc.Server) { pb.RegisterMasterServer(gs, s) } + // prepare config to join an existing cluster + err = prepareJoinEtcd(s.cfg) + if err != nil { + return + } + // start embed etcd server, gRPC API server and HTTP (API, status and debug) server. s.etcd, err = startEtcd(s.cfg, gRPCSvr, userHandles) if err != nil { From c9a413fe473ee766934ff1a2f045c49e7d63ebf0 Mon Sep 17 00:00:00 2001 From: csuzhangxc Date: Wed, 13 Nov 2019 20:31:07 +0800 Subject: [PATCH 26/27] master: address comments --- dm/master/etcd.go | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/dm/master/etcd.go b/dm/master/etcd.go index 6571db8d91..a7983560c6 100644 --- a/dm/master/etcd.go +++ b/dm/master/etcd.go @@ -24,9 +24,11 @@ import ( "go.etcd.io/etcd/clientv3" "go.etcd.io/etcd/embed" + "go.uber.org/zap" "google.golang.org/grpc" "github.com/pingcap/dm/pkg/etcdutil" + "github.com/pingcap/dm/pkg/log" "github.com/pingcap/dm/pkg/terror" ) @@ -74,7 +76,7 @@ func startEtcd(masterCfg *Config, // // when setting `initial-cluster` explicitly to bootstrap a new cluster: // - if local persistent data exist, just restart the previous cluster (in fact, it's not bootstrapping). -// - if local persistent data not exist, just bootstrap the cluster. +// - if local persistent data not exist, just bootstrap the cluster as a new cluster. // // when setting `join` to join an existing cluster (without `initial-cluster` set): // - if local persistent data exists (in fact, it's not join): @@ -114,6 +116,7 @@ func prepareJoinEtcd(cfg *Config) error { } else { cfg.InitialCluster = strings.TrimSpace(string(s)) cfg.InitialClusterState = embed.ClusterStateFlagExisting + log.L().Info("using persistent join data", zap.String("file", joinFP), zap.String("data", cfg.InitialCluster)) return nil } From b0f53328e849e95757c15fe7f599e7115b04258e Mon Sep 17 00:00:00 2001 From: csuzhangxc Date: Thu, 14 Nov 2019 15:35:40 +0800 Subject: [PATCH 27/27] master: address comments --- dm/master/config.go | 4 ++-- dm/master/etcd.go | 19 ++++++++++++++----- dm/master/etcd_test.go | 10 +++++----- dm/master/server_test.go | 2 +- 4 files changed, 22 insertions(+), 13 deletions(-) diff --git a/dm/master/config.go b/dm/master/config.go index 8bfb1d8bd6..4c1e510a54 100644 --- a/dm/master/config.go +++ b/dm/master/config.go @@ -66,7 +66,7 @@ func NewConfig() *Config { fs.StringVar(&cfg.InitialCluster, "initial-cluster", "", fmt.Sprintf("initial cluster configuration for bootstrapping, e,g. dm-master=%s", defaultPeerUrls)) fs.StringVar(&cfg.PeerUrls, "peer-urls", defaultPeerUrls, "URLs for peer traffic") fs.StringVar(&cfg.AdvertisePeerUrls, "advertise-peer-urls", "", `advertise URLs for peer traffic (default "${peer-urls}")`) - fs.StringVar(&cfg.Join, "join", "", `join to an existing cluster (usage: cluster's "${advertise-client-urls}"`) + fs.StringVar(&cfg.Join, "join", "", `join to an existing cluster (usage: cluster's "${master-addr}" list, e,g. "127.0.0.1:8261,127.0.0.1:18261"`) return cfg } @@ -116,7 +116,7 @@ type Config struct { AdvertisePeerUrls string `toml:"advertise-peer-urls" json:"advertise-peer-urls"` InitialCluster string `toml:"initial-cluster" json:"initial-cluster"` InitialClusterState string `toml:"initial-cluster-state" json:"initial-cluster-state"` - Join string `toml:"join" json:"join"` + Join string `toml:"join" json:"join"` // cluster's client address (endpoints), not peer address printVersion bool printSampleConfig bool diff --git a/dm/master/etcd.go b/dm/master/etcd.go index a7983560c6..671b6cdb57 100644 --- a/dm/master/etcd.go +++ b/dm/master/etcd.go @@ -96,8 +96,11 @@ func prepareJoinEtcd(cfg *Config) error { } // try to join self, invalid - if cfg.Join == cfg.AdvertisePeerUrls { - return terror.ErrMasterJoinEmbedEtcdFail.Generate(fmt.Sprintf("join self %s is forbidden", cfg.Join)) + clientURLs := strings.Split(cfg.Join, ",") + for _, clientURL := range clientURLs { + if clientURL == cfg.MasterAddr { + return terror.ErrMasterJoinEmbedEtcdFail.Generate(fmt.Sprintf("join self %s is forbidden", cfg.Join)) + } } // restart with previous data, no `InitialCluster` need to set @@ -138,8 +141,11 @@ func prepareJoinEtcd(cfg *Config) error { // check members for _, m := range listResp.Members { - if m.Name == "" { - return terror.ErrMasterJoinEmbedEtcdFail.Generate("there is a member that has not joined successfully") + if m.Name == "" { // the previous existing member without name (not complete the join operation) + // we can't generate `initial-cluster` correctly with empty member name, + // and if added a member but not started it to complete the join, + // the later join operation may encounter `etcdserver: re-configuration failed due to not enough started members`. + return terror.ErrMasterJoinEmbedEtcdFail.Generate("there is a member that has not joined successfully, continue the join or remove it") } if m.Name == cfg.Name { // a failed DM-master re-joins the previous cluster. @@ -164,7 +170,10 @@ func prepareJoinEtcd(cfg *Config) error { name = cfg.Name } if name == "" { - return terror.ErrMasterJoinEmbedEtcdFail.Generate("there is a member that has not joined successfully") + // this should be checked in the previous `member list` operation if having only one member is join. + // if multi join operations exist, the behavior may be unexpected. + // check again here only to decrease the unexpectedness. + return terror.ErrMasterJoinEmbedEtcdFail.Generate("there is a member that has not joined successfully, continue the join or remove it") } for _, url := range m.PeerURLs { ms = append(ms, fmt.Sprintf("%s=%s", name, url)) diff --git a/dm/master/etcd_test.go b/dm/master/etcd_test.go index a1e3bdf8c0..dc5f962649 100644 --- a/dm/master/etcd_test.go +++ b/dm/master/etcd_test.go @@ -51,7 +51,7 @@ func (t *testEtcdSuite) TestPrepareJoinEtcd(c *check.C) { cfgAfter := t.cloneConfig(cfgBefore) // after `prepareJoinEtcd applied - joinCluster := cfgCluster.PeerUrls + joinCluster := cfgCluster.MasterAddr joinFP := filepath.Join(cfgBefore.DataDir, "join") memberDP := filepath.Join(cfgBefore.DataDir, "member") @@ -60,7 +60,7 @@ func (t *testEtcdSuite) TestPrepareJoinEtcd(c *check.C) { c.Assert(cfgAfter, check.DeepEquals, cfgBefore) // try to join self - cfgAfter.Join = cfgAfter.AdvertisePeerUrls + cfgAfter.Join = cfgAfter.MasterAddr err := prepareJoinEtcd(cfgAfter) c.Assert(terror.ErrMasterJoinEmbedEtcdFail.Equal(err), check.IsTrue) c.Assert(err, check.ErrorMatches, ".*fail to join embed etcd: join self.*is forbidden.*") @@ -142,7 +142,7 @@ func (t *testEtcdSuite) TestPrepareJoinEtcd(c *check.C) { cfgAfter2.AdvertisePeerUrls = cfgAfter2.PeerUrls err = prepareJoinEtcd(cfgAfter2) c.Assert(terror.ErrMasterJoinEmbedEtcdFail.Equal(err), check.IsTrue) - c.Assert(err, check.ErrorMatches, ".*fail to join embed etcd: there is a member that has not joined successfully.*") + c.Assert(err, check.ErrorMatches, ".*fail to join embed etcd: there is a member that has not joined successfully, continue the join or remove it.*") // start the joining etcd e2, err := startEtcd(cfgAfter, nil, nil) @@ -150,7 +150,7 @@ func (t *testEtcdSuite) TestPrepareJoinEtcd(c *check.C) { defer e2.Close() // try join again - for i := 0; i < 10; i++ { + for i := 0; i < 20; i++ { err = prepareJoinEtcd(cfgAfter2) if err == nil { break @@ -158,7 +158,7 @@ func (t *testEtcdSuite) TestPrepareJoinEtcd(c *check.C) { // for `etcdserver: unhealthy cluster`, try again later c.Assert(terror.ErrMasterJoinEmbedEtcdFail.Equal(err), check.IsTrue) c.Assert(err, check.ErrorMatches, ".*fail to join embed etcd: add member.*: etcdserver: unhealthy cluster.*") - time.Sleep(time.Second) + time.Sleep(500 * time.Millisecond) } c.Assert(err, check.IsNil) } diff --git a/dm/master/server_test.go b/dm/master/server_test.go index 0f1e6ec3fb..72a40b82af 100644 --- a/dm/master/server_test.go +++ b/dm/master/server_test.go @@ -1555,7 +1555,7 @@ func (t *testMaster) TestJoinMember(c *check.C) { cfg2.MasterAddr = tempurl.Alloc()[len("http://"):] cfg2.PeerUrls = tempurl.Alloc() cfg2.AdvertisePeerUrls = cfg2.PeerUrls - cfg2.Join = cfg1.AdvertisePeerUrls // join to an existing cluster + cfg2.Join = cfg1.MasterAddr // join to an existing cluster s2 := NewServer(cfg2) c.Assert(s2.Start(ctx), check.IsNil)