Skip to content

Commit

Permalink
cluster: check support check before scale-out the instance (#1659)
Browse files Browse the repository at this point in the history
  • Loading branch information
srstack authored Dec 30, 2021
1 parent 13b33c0 commit 45c5017
Show file tree
Hide file tree
Showing 5 changed files with 152 additions and 39 deletions.
20 changes: 16 additions & 4 deletions components/cluster/command/check.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,22 +28,34 @@ func newCheckCmd() *cobra.Command {
IdentityFile: path.Join(utils.UserHome(), ".ssh", "id_rsa"),
}
cmd := &cobra.Command{
Use: "check <topology.yml | cluster-name>",
Use: "check <topology.yml | cluster-name> [scale-out.yml]",
Short: "Perform preflight checks for the cluster.",
Long: `Perform preflight checks for the cluster. By default, it checks deploy servers
before a cluster is deployed, the input is the topology.yaml for the cluster.
If '--cluster' is set, it will perform checks for an existing cluster, the input
is the cluster name. Some checks are ignore in this mode, such as port and dir
conflict checks with other clusters`,
conflict checks with other clusters
If you want to check the scale-out topology, please use execute the following command
' check <cluster-name> <scale-out.yml> --cluster '
it will the new instances `,
RunE: func(cmd *cobra.Command, args []string) error {
if len(args) != 1 {
if len(args) != 1 && len(args) != 2 {
return cmd.Help()
}
scaleOutTopo := ""

if opt.ExistCluster {
clusterReport.ID = scrubClusterName(args[0])
}
return cm.CheckCluster(args[0], opt, gOpt)

if len(args) == 2 {
if !opt.ExistCluster {
return cmd.Help()
}
scaleOutTopo = args[1]
}

return cm.CheckCluster(args[0], scaleOutTopo, opt, gOpt)
},
}

Expand Down
127 changes: 100 additions & 27 deletions pkg/cluster/manager/check.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,13 +45,14 @@ type CheckOptions struct {
}

// CheckCluster check cluster before deploying or upgrading
func (m *Manager) CheckCluster(clusterOrTopoName string, opt CheckOptions, gOpt operator.Options) error {
func (m *Manager) CheckCluster(clusterOrTopoName, scaleoutTopo string, opt CheckOptions, gOpt operator.Options) error {
var topo spec.Specification
ctx := ctxt.New(
context.Background(),
gOpt.Concurrency,
m.logger,
)
var currTopo *spec.Specification

if opt.ExistCluster { // check for existing cluster
clusterName := clusterOrTopoName
Expand All @@ -69,10 +70,28 @@ func (m *Manager) CheckCluster(clusterOrTopoName string, opt CheckOptions, gOpt
if err != nil {
return err
}
opt.User = metadata.User
opt.IdentityFile = m.specManager.Path(clusterName, "ssh", "id_rsa")

topo = *metadata.Topology
if scaleoutTopo != "" {
currTopo = metadata.Topology
// complete global configuration
topo.GlobalOptions = currTopo.GlobalOptions
topo.MonitoredOptions = currTopo.MonitoredOptions
topo.ServerConfigs = currTopo.ServerConfigs

if err := spec.ParseTopologyYaml(scaleoutTopo, &topo); err != nil {
return err
}
spec.ExpandRelativeDir(&topo)

// checkConflict after fillHostArch
// scaleOutTopo also is not exists instacne
opt.ExistCluster = false
} else {
opt.IdentityFile = m.specManager.Path(clusterName, "ssh", "id_rsa")
topo = *metadata.Topology
opt.User = metadata.User
}

topo.AdjustByVersion(metadata.Version)
} else { // check before cluster is deployed
topoFileName := clusterOrTopoName
Expand All @@ -82,15 +101,7 @@ func (m *Manager) CheckCluster(clusterOrTopoName string, opt CheckOptions, gOpt
}
spec.ExpandRelativeDir(&topo)

clusterList, err := m.specManager.GetAllClusters()
if err != nil {
return err
}
// use a dummy cluster name, the real cluster name is set during deploy
if err := spec.CheckClusterPortConflict(clusterList, "nonexist-dummy-tidb-cluster", &topo); err != nil {
return err
}
if err := spec.CheckClusterDirConflict(clusterList, "nonexist-dummy-tidb-cluster", &topo); err != nil {
if err := checkConflict(m, "nonexist-dummy-tidb-cluster", &topo); err != nil {
return err
}
}
Expand All @@ -115,6 +126,17 @@ func (m *Manager) CheckCluster(clusterOrTopoName string, opt CheckOptions, gOpt
return err
}

// Abort scale out operation if the merged topology is invalid
if currTopo != nil && scaleoutTopo != "" {
mergedTopo := currTopo.MergeTopo(&topo)
if err := mergedTopo.Validate(); err != nil {
return err
}
if err := checkConflict(m, clusterOrTopoName, mergedTopo); err != nil {
return err
}
}

if err := checkSystemInfo(ctx, sshConnProps, sshProxyProps, &topo, &gOpt, &opt); err != nil {
return err
}
Expand Down Expand Up @@ -186,6 +208,38 @@ func checkSystemInfo(
opt.Opr,
)
}

if !opt.ExistCluster {
t1 = t1.
CheckSys(
inst.GetHost(),
inst.DeployDir(),
task.ChecktypeIsExist,
topo,
opt.Opr,
).
CheckSys(
inst.GetHost(),
inst.DataDir(),
task.ChecktypeIsExist,
topo,
opt.Opr,
).
CheckSys(
inst.GetHost(),
inst.LogDir(),
task.ChecktypeIsExist,
topo,
opt.Opr,
).
CheckSys(
inst.GetHost(),
fmt.Sprintf("/etc/systemd/system/%s-%d.service", inst.ComponentName(), inst.GetPort()),
task.ChecktypeIsExist,
topo,
opt.Opr,
)
}
// if the data dir set in topology is relative, and the home dir of deploy user
// and the user run the check command is on different partitions, the disk detection
// may be using incorrect partition for validations.
Expand All @@ -199,6 +253,7 @@ func checkSystemInfo(
topo,
opt.Opr,
)

if opt.ExistCluster {
t1 = t1.CheckSys(
inst.GetHost(),
Expand Down Expand Up @@ -270,20 +325,6 @@ func checkSystemInfo(
topo,
opt.Opr,
).
// check for listening port
Shell(
inst.GetHost(),
"ss -lnt",
"",
false,
).
CheckSys(
inst.GetHost(),
"",
task.CheckTypePort,
topo,
opt.Opr,
).
// check for system limits
Shell(
inst.GetHost(),
Expand Down Expand Up @@ -328,6 +369,24 @@ func checkSystemInfo(
topo,
opt.Opr,
)

if !opt.ExistCluster {
t1 = t1.
// check for listening port
Shell(
inst.GetHost(),
"ss -lnt",
"",
false,
).
CheckSys(
inst.GetHost(),
"",
task.CheckTypePort,
topo,
opt.Opr,
)
}
}

checkSysTasks = append(
Expand Down Expand Up @@ -596,3 +655,17 @@ func (m *Manager) checkRegionsInfo(clusterName string, topo *spec.Specification,
}
return nil
}

// checkConflict checks cluster conflict
func checkConflict(m *Manager, clusterName string, topo spec.Topology) error {
clusterList, err := m.specManager.GetAllClusters()
if err != nil {
return err
}
// use a dummy cluster name, the real cluster name is set during deploy
if err := spec.CheckClusterPortConflict(clusterList, clusterName, topo); err != nil {
return err
}
err = spec.CheckClusterDirConflict(clusterList, clusterName, topo)
return err
}
9 changes: 1 addition & 8 deletions pkg/cluster/manager/scale_out.go
Original file line number Diff line number Diff line change
Expand Up @@ -166,14 +166,7 @@ func (m *Manager) ScaleOut(
}
}

clusterList, err := m.specManager.GetAllClusters()
if err != nil {
return err
}
if err := spec.CheckClusterPortConflict(clusterList, name, mergedTopo); err != nil {
return err
}
if err := spec.CheckClusterDirConflict(clusterList, name, mergedTopo); err != nil {
if err := checkConflict(m, name, mergedTopo); err != nil {
return err
}
}
Expand Down
27 changes: 27 additions & 0 deletions pkg/cluster/operation/check.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ var (
CheckNameFio = "fio"
CheckNameTHP = "thp"
CheckNameDirPermission = "permission"
CheckNameDirExist = "exist"
)

// CheckResult is the result of a check
Expand Down Expand Up @@ -866,3 +867,29 @@ func CheckDirPermission(ctx context.Context, e ctxt.Executor, user, path string)

return results
}

// CheckDirIsExist check if the directory exists
func CheckDirIsExist(ctx context.Context, e ctxt.Executor, path string) []*CheckResult {
var results []*CheckResult

if path == "" {
return results
}

req, _, _ := e.Execute(ctx,
fmt.Sprintf(
"[ -e %s ] && echo 1",
path,
),
false)

if strings.ReplaceAll(string(req), "\n", "") == "1" {
results = append(results, &CheckResult{
Name: CheckNameDirExist,
Err: fmt.Errorf("%s already exists", path),
Msg: fmt.Sprintf("%s already exists", path),
})
}

return results
}
8 changes: 8 additions & 0 deletions pkg/cluster/task/check.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ var (
CheckTypePartitions = "partitions"
CheckTypeFIO = "fio"
CheckTypePermission = "permission"
ChecktypeIsExist = "exist"
)

// place the check utilities are stored
Expand Down Expand Up @@ -147,6 +148,13 @@ func (c *CheckSys) Execute(ctx context.Context) error {
return ErrNoExecutor
}
storeResults(ctx, c.host, operator.CheckDirPermission(ctx, e, c.topo.GlobalOptions.User, c.checkDir))
case ChecktypeIsExist:
e, ok := ctxt.GetInner(ctx).GetExecutor(c.host)
if !ok {
return ErrNoExecutor
}
// check partition mount options for data_dir
storeResults(ctx, c.host, operator.CheckDirIsExist(ctx, e, c.checkDir))
}

return nil
Expand Down

0 comments on commit 45c5017

Please sign in to comment.