Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

cluster: check support check before scale-out the instance #1659

Merged
merged 17 commits into from
Dec 30, 2021
20 changes: 16 additions & 4 deletions components/cluster/command/check.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,22 +28,34 @@ func newCheckCmd() *cobra.Command {
IdentityFile: path.Join(utils.UserHome(), ".ssh", "id_rsa"),
}
cmd := &cobra.Command{
Use: "check <topology.yml | cluster-name>",
Use: "check <topology.yml | cluster-name> [scale-out.yml]",
Short: "Perform preflight checks for the cluster.",
Long: `Perform preflight checks for the cluster. By default, it checks deploy servers
before a cluster is deployed, the input is the topology.yaml for the cluster.
If '--cluster' is set, it will perform checks for an existing cluster, the input
is the cluster name. Some checks are ignore in this mode, such as port and dir
conflict checks with other clusters`,
conflict checks with other clusters
If you want to check the scale-out topology, please use execute the following command
' check <cluster-name> <scale-out.yml> --cluster '
it will the new instances `,
RunE: func(cmd *cobra.Command, args []string) error {
if len(args) != 1 {
if len(args) != 1 && len(args) != 2 {
return cmd.Help()
}
scaleOutTopo := ""

if opt.ExistCluster {
clusterReport.ID = scrubClusterName(args[0])
}
return cm.CheckCluster(args[0], opt, gOpt)

if len(args) == 2 {
if !opt.ExistCluster {
return cmd.Help()
}
scaleOutTopo = args[1]
}

return cm.CheckCluster(args[0], scaleOutTopo, opt, gOpt)
},
}

Expand Down
127 changes: 100 additions & 27 deletions pkg/cluster/manager/check.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,13 +45,14 @@ type CheckOptions struct {
}

// CheckCluster check cluster before deploying or upgrading
func (m *Manager) CheckCluster(clusterOrTopoName string, opt CheckOptions, gOpt operator.Options) error {
func (m *Manager) CheckCluster(clusterOrTopoName, scaleoutTopo string, opt CheckOptions, gOpt operator.Options) error {
var topo spec.Specification
ctx := ctxt.New(
context.Background(),
gOpt.Concurrency,
m.logger,
)
var currTopo *spec.Specification

if opt.ExistCluster { // check for existing cluster
clusterName := clusterOrTopoName
Expand All @@ -69,10 +70,28 @@ func (m *Manager) CheckCluster(clusterOrTopoName string, opt CheckOptions, gOpt
if err != nil {
return err
}
opt.User = metadata.User
opt.IdentityFile = m.specManager.Path(clusterName, "ssh", "id_rsa")

topo = *metadata.Topology
if scaleoutTopo != "" {
currTopo = metadata.Topology
// complete global configuration
topo.GlobalOptions = currTopo.GlobalOptions
topo.MonitoredOptions = currTopo.MonitoredOptions
topo.ServerConfigs = currTopo.ServerConfigs

if err := spec.ParseTopologyYaml(scaleoutTopo, &topo); err != nil {
return err
}
spec.ExpandRelativeDir(&topo)

// checkConflict after fillHostArch
// scaleOutTopo also is not exists instacne
opt.ExistCluster = false
} else {
opt.IdentityFile = m.specManager.Path(clusterName, "ssh", "id_rsa")
topo = *metadata.Topology
opt.User = metadata.User
}

topo.AdjustByVersion(metadata.Version)
} else { // check before cluster is deployed
topoFileName := clusterOrTopoName
Expand All @@ -82,15 +101,7 @@ func (m *Manager) CheckCluster(clusterOrTopoName string, opt CheckOptions, gOpt
}
spec.ExpandRelativeDir(&topo)

clusterList, err := m.specManager.GetAllClusters()
if err != nil {
return err
}
// use a dummy cluster name, the real cluster name is set during deploy
if err := spec.CheckClusterPortConflict(clusterList, "nonexist-dummy-tidb-cluster", &topo); err != nil {
return err
}
if err := spec.CheckClusterDirConflict(clusterList, "nonexist-dummy-tidb-cluster", &topo); err != nil {
if err := checkConflict(m, "nonexist-dummy-tidb-cluster", &topo); err != nil {
return err
}
}
Expand All @@ -115,6 +126,17 @@ func (m *Manager) CheckCluster(clusterOrTopoName string, opt CheckOptions, gOpt
return err
}

// Abort scale out operation if the merged topology is invalid
if currTopo != nil && scaleoutTopo != "" {
mergedTopo := currTopo.MergeTopo(&topo)
if err := mergedTopo.Validate(); err != nil {
return err
}
if err := checkConflict(m, clusterOrTopoName, mergedTopo); err != nil {
return err
}
}

if err := checkSystemInfo(ctx, sshConnProps, sshProxyProps, &topo, &gOpt, &opt); err != nil {
return err
}
Expand Down Expand Up @@ -186,6 +208,38 @@ func checkSystemInfo(
opt.Opr,
)
}

if !opt.ExistCluster {
t1 = t1.
CheckSys(
inst.GetHost(),
inst.DeployDir(),
task.ChecktypeIsExist,
topo,
opt.Opr,
).
CheckSys(
inst.GetHost(),
inst.DataDir(),
task.ChecktypeIsExist,
topo,
opt.Opr,
).
CheckSys(
inst.GetHost(),
inst.LogDir(),
task.ChecktypeIsExist,
topo,
opt.Opr,
).
CheckSys(
inst.GetHost(),
fmt.Sprintf("/etc/systemd/system/%s-%d.service", inst.ComponentName(), inst.GetPort()),
task.ChecktypeIsExist,
topo,
opt.Opr,
)
}
// if the data dir set in topology is relative, and the home dir of deploy user
// and the user run the check command is on different partitions, the disk detection
// may be using incorrect partition for validations.
Expand All @@ -199,6 +253,7 @@ func checkSystemInfo(
topo,
opt.Opr,
)

if opt.ExistCluster {
t1 = t1.CheckSys(
inst.GetHost(),
Expand Down Expand Up @@ -270,20 +325,6 @@ func checkSystemInfo(
topo,
opt.Opr,
).
// check for listening port
Shell(
inst.GetHost(),
"ss -lnt",
"",
false,
).
CheckSys(
inst.GetHost(),
"",
task.CheckTypePort,
topo,
opt.Opr,
).
// check for system limits
Shell(
inst.GetHost(),
Expand Down Expand Up @@ -328,6 +369,24 @@ func checkSystemInfo(
topo,
opt.Opr,
)

if !opt.ExistCluster {
t1 = t1.
// check for listening port
Shell(
inst.GetHost(),
"ss -lnt",
"",
false,
).
CheckSys(
inst.GetHost(),
"",
task.CheckTypePort,
topo,
opt.Opr,
)
}
}

checkSysTasks = append(
Expand Down Expand Up @@ -596,3 +655,17 @@ func (m *Manager) checkRegionsInfo(clusterName string, topo *spec.Specification,
}
return nil
}

// checkConflict checks cluster conflict
func checkConflict(m *Manager, clusterName string, topo spec.Topology) error {
clusterList, err := m.specManager.GetAllClusters()
if err != nil {
return err
}
// use a dummy cluster name, the real cluster name is set during deploy
if err := spec.CheckClusterPortConflict(clusterList, clusterName, topo); err != nil {
return err
}
err = spec.CheckClusterDirConflict(clusterList, clusterName, topo)
return err
}
9 changes: 1 addition & 8 deletions pkg/cluster/manager/scale_out.go
Original file line number Diff line number Diff line change
Expand Up @@ -166,14 +166,7 @@ func (m *Manager) ScaleOut(
}
}

clusterList, err := m.specManager.GetAllClusters()
if err != nil {
return err
}
if err := spec.CheckClusterPortConflict(clusterList, name, mergedTopo); err != nil {
return err
}
if err := spec.CheckClusterDirConflict(clusterList, name, mergedTopo); err != nil {
if err := checkConflict(m, name, mergedTopo); err != nil {
return err
}
}
Expand Down
27 changes: 27 additions & 0 deletions pkg/cluster/operation/check.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ var (
CheckNameFio = "fio"
CheckNameTHP = "thp"
CheckNameDirPermission = "permission"
CheckNameDirExist = "exist"
)

// CheckResult is the result of a check
Expand Down Expand Up @@ -866,3 +867,29 @@ func CheckDirPermission(ctx context.Context, e ctxt.Executor, user, path string)

return results
}

// CheckDirIsExist check if the directory exists
func CheckDirIsExist(ctx context.Context, e ctxt.Executor, path string) []*CheckResult {
var results []*CheckResult

if path == "" {
return results
}

req, _, _ := e.Execute(ctx,
fmt.Sprintf(
"[ -e %s ] && echo 1",
path,
),
false)

if strings.ReplaceAll(string(req), "\n", "") == "1" {
results = append(results, &CheckResult{
Name: CheckNameDirExist,
Err: fmt.Errorf("%s already exists", path),
Msg: fmt.Sprintf("%s already exists", path),
})
}

return results
}
8 changes: 8 additions & 0 deletions pkg/cluster/task/check.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ var (
CheckTypePartitions = "partitions"
CheckTypeFIO = "fio"
CheckTypePermission = "permission"
ChecktypeIsExist = "exist"
)

// place the check utilities are stored
Expand Down Expand Up @@ -147,6 +148,13 @@ func (c *CheckSys) Execute(ctx context.Context) error {
return ErrNoExecutor
}
storeResults(ctx, c.host, operator.CheckDirPermission(ctx, e, c.topo.GlobalOptions.User, c.checkDir))
case ChecktypeIsExist:
e, ok := ctxt.GetInner(ctx).GetExecutor(c.host)
if !ok {
return ErrNoExecutor
}
// check partition mount options for data_dir
storeResults(ctx, c.host, operator.CheckDirIsExist(ctx, e, c.checkDir))
}

return nil
Expand Down