diff --git a/components/cluster/command/clean.go b/components/cluster/command/clean.go index a93e0e96af..560a47d99f 100644 --- a/components/cluster/command/clean.go +++ b/components/cluster/command/clean.go @@ -50,7 +50,7 @@ You can retain some nodes and roles data when cleanup the cluster, eg: return cmd.Help() } - return manager.CleanCluster(clusterName, gOpt, cleanOpt, skipConfirm) + return cm.CleanCluster(clusterName, gOpt, cleanOpt, skipConfirm) }, } diff --git a/components/cluster/command/deploy.go b/components/cluster/command/deploy.go index c7fe46d199..0b1339356c 100644 --- a/components/cluster/command/deploy.go +++ b/components/cluster/command/deploy.go @@ -19,8 +19,8 @@ import ( "path" "github.com/pingcap/tiup/pkg/cliutil" - "github.com/pingcap/tiup/pkg/cluster" "github.com/pingcap/tiup/pkg/cluster/executor" + "github.com/pingcap/tiup/pkg/cluster/manager" operator "github.com/pingcap/tiup/pkg/cluster/operation" "github.com/pingcap/tiup/pkg/cluster/report" "github.com/pingcap/tiup/pkg/cluster/spec" @@ -45,7 +45,7 @@ var ( ) func newDeploy() *cobra.Command { - opt := cluster.DeployOptions{ + opt := manager.DeployOptions{ IdentityFile: path.Join(utils.UserHome(), ".ssh", "id_rsa"), } cmd := &cobra.Command{ @@ -77,17 +77,7 @@ func newDeploy() *cobra.Command { teleTopology = string(data) } - return manager.Deploy( - clusterName, - version, - topoFile, - opt, - postDeployHook, - skipConfirm, - gOpt.OptTimeout, - gOpt.SSHTimeout, - gOpt.SSHType, - ) + return cm.Deploy(clusterName, version, topoFile, opt, postDeployHook, skipConfirm, gOpt) }, } diff --git a/components/cluster/command/destroy.go b/components/cluster/command/destroy.go index 306c06b1a2..4c81c36027 100644 --- a/components/cluster/command/destroy.go +++ b/components/cluster/command/destroy.go @@ -50,7 +50,7 @@ You can retain some nodes and roles data when destroy cluster, eg: } } - return manager.DestroyCluster(clusterName, gOpt, destoyOpt, skipConfirm) + return cm.DestroyCluster(clusterName, gOpt, destoyOpt, skipConfirm) }, } diff --git a/components/cluster/command/disable.go b/components/cluster/command/disable.go index b1ac53b415..0386c21aae 100644 --- a/components/cluster/command/disable.go +++ b/components/cluster/command/disable.go @@ -33,7 +33,7 @@ func newDisableCmd() *cobra.Command { clusterName := args[0] teleCommand = append(teleCommand, scrubClusterName(clusterName)) - return manager.EnableCluster(clusterName, gOpt, false) + return cm.EnableCluster(clusterName, gOpt, false) }, } diff --git a/components/cluster/command/display.go b/components/cluster/command/display.go index dc807c3cb4..4710cdb2a3 100644 --- a/components/cluster/command/display.go +++ b/components/cluster/command/display.go @@ -67,7 +67,7 @@ func newDisplayCmd() *cobra.Command { return displayDashboardInfo(clusterName, tlsCfg) } - return manager.Display(clusterName, gOpt) + return cm.Display(clusterName, gOpt) }, } diff --git a/components/cluster/command/edit_config.go b/components/cluster/command/edit_config.go index 18ed3a5261..94a6529e46 100644 --- a/components/cluster/command/edit_config.go +++ b/components/cluster/command/edit_config.go @@ -29,7 +29,7 @@ func newEditConfigCmd() *cobra.Command { clusterName := args[0] teleCommand = append(teleCommand, scrubClusterName(clusterName)) - return manager.EditConfig(clusterName, skipConfirm) + return cm.EditConfig(clusterName, skipConfirm) }, } diff --git a/components/cluster/command/enable.go b/components/cluster/command/enable.go index b78dc1194c..311ec7735d 100644 --- a/components/cluster/command/enable.go +++ b/components/cluster/command/enable.go @@ -33,7 +33,7 @@ func newEnableCmd() *cobra.Command { clusterName := args[0] teleCommand = append(teleCommand, scrubClusterName(clusterName)) - return manager.EnableCluster(clusterName, gOpt, true) + return cm.EnableCluster(clusterName, gOpt, true) }, } diff --git a/components/cluster/command/exec.go b/components/cluster/command/exec.go index e9c8802afd..a4ea835968 100644 --- a/components/cluster/command/exec.go +++ b/components/cluster/command/exec.go @@ -14,12 +14,12 @@ package command import ( - "github.com/pingcap/tiup/pkg/cluster" + "github.com/pingcap/tiup/pkg/cluster/manager" "github.com/spf13/cobra" ) func newExecCmd() *cobra.Command { - opt := cluster.ExecOptions{} + opt := manager.ExecOptions{} cmd := &cobra.Command{ Use: "exec ", Short: "Run shell command on host in the tidb cluster", @@ -31,7 +31,7 @@ func newExecCmd() *cobra.Command { clusterName := args[0] teleCommand = append(teleCommand, scrubClusterName(clusterName)) - return manager.Exec(clusterName, opt, gOpt) + return cm.Exec(clusterName, opt, gOpt) }, } diff --git a/components/cluster/command/list.go b/components/cluster/command/list.go index 363badb33e..9fe8335cc4 100644 --- a/components/cluster/command/list.go +++ b/components/cluster/command/list.go @@ -22,7 +22,7 @@ func newListCmd() *cobra.Command { Use: "list", Short: "List all clusters", RunE: func(cmd *cobra.Command, args []string) error { - return manager.ListCluster() + return cm.ListCluster() }, } return cmd diff --git a/components/cluster/command/patch.go b/components/cluster/command/patch.go index dbbe4ed4fc..249006eef9 100644 --- a/components/cluster/command/patch.go +++ b/components/cluster/command/patch.go @@ -40,7 +40,7 @@ func newPatchCmd() *cobra.Command { clusterName := args[0] teleCommand = append(teleCommand, scrubClusterName(clusterName)) - return manager.Patch(clusterName, args[1], gOpt, overwrite) + return cm.Patch(clusterName, args[1], gOpt, overwrite) }, } diff --git a/components/cluster/command/prune.go b/components/cluster/command/prune.go index 7c9ae5682b..25f594e63e 100644 --- a/components/cluster/command/prune.go +++ b/components/cluster/command/prune.go @@ -28,7 +28,7 @@ func newPruneCmd() *cobra.Command { clusterName := args[0] - return manager.DestroyTombstone(clusterName, gOpt, skipConfirm) + return cm.DestroyTombstone(clusterName, gOpt, skipConfirm) }, } diff --git a/components/cluster/command/reload.go b/components/cluster/command/reload.go index 0d815e13e8..ea097722b3 100644 --- a/components/cluster/command/reload.go +++ b/components/cluster/command/reload.go @@ -36,7 +36,7 @@ func newReloadCmd() *cobra.Command { clusterName := args[0] teleCommand = append(teleCommand, scrubClusterName(clusterName)) - return manager.Reload(clusterName, gOpt, skipRestart) + return cm.Reload(clusterName, gOpt, skipRestart) }, } diff --git a/components/cluster/command/rename.go b/components/cluster/command/rename.go index e16838a9a3..68ece2590b 100644 --- a/components/cluster/command/rename.go +++ b/components/cluster/command/rename.go @@ -34,7 +34,7 @@ func newRenameCmd() *cobra.Command { newClusterName := args[1] teleCommand = append(teleCommand, scrubClusterName(oldClusterName)) - return manager.Rename(oldClusterName, gOpt, newClusterName) + return cm.Rename(oldClusterName, gOpt, newClusterName) }, } diff --git a/components/cluster/command/restart.go b/components/cluster/command/restart.go index 7f5b5491ff..ce5ea90be6 100644 --- a/components/cluster/command/restart.go +++ b/components/cluster/command/restart.go @@ -33,7 +33,7 @@ func newRestartCmd() *cobra.Command { clusterName := args[0] teleCommand = append(teleCommand, scrubClusterName(clusterName)) - return manager.RestartCluster(clusterName, gOpt) + return cm.RestartCluster(clusterName, gOpt) }, } diff --git a/components/cluster/command/root.go b/components/cluster/command/root.go index c51e29f3c2..6f4f5a64e8 100644 --- a/components/cluster/command/root.go +++ b/components/cluster/command/root.go @@ -25,9 +25,9 @@ import ( "github.com/google/uuid" "github.com/joomcode/errorx" "github.com/pingcap/tiup/pkg/cliutil" - "github.com/pingcap/tiup/pkg/cluster" "github.com/pingcap/tiup/pkg/cluster/executor" "github.com/pingcap/tiup/pkg/cluster/flags" + "github.com/pingcap/tiup/pkg/cluster/manager" operator "github.com/pingcap/tiup/pkg/cluster/operation" "github.com/pingcap/tiup/pkg/cluster/report" "github.com/pingcap/tiup/pkg/cluster/spec" @@ -52,7 +52,7 @@ var ( ) var tidbSpec *spec.SpecManager -var manager *cluster.Manager +var cm *manager.Manager func scrubClusterName(n string) string { return "cluster_" + telemetry.HashReport(n) @@ -100,7 +100,7 @@ func init() { } tidbSpec = spec.GetSpecManager() - manager = cluster.NewManager("tidb", tidbSpec, spec.TiDBComponentVersion) + cm = manager.NewManager("tidb", tidbSpec, spec.TiDBComponentVersion) logger.EnableAuditLog(spec.AuditDir()) // Running in other OS/ARCH Should be fine we only download manifest file. diff --git a/components/cluster/command/scale_in.go b/components/cluster/command/scale_in.go index e4083d4b91..91182278d3 100644 --- a/components/cluster/command/scale_in.go +++ b/components/cluster/command/scale_in.go @@ -47,16 +47,7 @@ func newScaleInCmd() *cobra.Command { UpdateTopology(clusterName, tidbSpec.Path(clusterName), metadata, nodes) } - return manager.ScaleIn( - clusterName, - skipConfirm, - gOpt.OptTimeout, - gOpt.SSHTimeout, - gOpt.SSHType, - gOpt.Force, - gOpt.Nodes, - scale, - ) + return cm.ScaleIn(clusterName, skipConfirm, gOpt, scale) }, } diff --git a/components/cluster/command/scale_out.go b/components/cluster/command/scale_out.go index 4e941962a5..a0edcf8195 100644 --- a/components/cluster/command/scale_out.go +++ b/components/cluster/command/scale_out.go @@ -17,8 +17,8 @@ import ( "io/ioutil" "path/filepath" - "github.com/pingcap/tiup/pkg/cluster" "github.com/pingcap/tiup/pkg/cluster/executor" + "github.com/pingcap/tiup/pkg/cluster/manager" "github.com/pingcap/tiup/pkg/cluster/spec" "github.com/pingcap/tiup/pkg/cluster/task" "github.com/pingcap/tiup/pkg/utils" @@ -26,7 +26,7 @@ import ( ) func newScaleOutCmd() *cobra.Command { - opt := cluster.ScaleOutOptions{ + opt := manager.ScaleOutOptions{ IdentityFile: filepath.Join(utils.UserHome(), ".ssh", "id_rsa"), } cmd := &cobra.Command{ @@ -51,16 +51,14 @@ func newScaleOutCmd() *cobra.Command { teleTopology = string(data) } - return manager.ScaleOut( + return cm.ScaleOut( clusterName, topoFile, postScaleOutHook, final, opt, skipConfirm, - gOpt.OptTimeout, - gOpt.SSHTimeout, - gOpt.SSHType, + gOpt, ) }, } diff --git a/components/cluster/command/start.go b/components/cluster/command/start.go index 1b1af10f22..43c5c8ef8c 100644 --- a/components/cluster/command/start.go +++ b/components/cluster/command/start.go @@ -35,7 +35,7 @@ func newStartCmd() *cobra.Command { clusterName := args[0] teleCommand = append(teleCommand, scrubClusterName(clusterName)) - return manager.StartCluster(clusterName, gOpt, func(b *task.Builder, metadata spec.Metadata) { + return cm.StartCluster(clusterName, gOpt, func(b *task.Builder, metadata spec.Metadata) { b.UpdateTopology( clusterName, tidbSpec.Path(clusterName), diff --git a/components/cluster/command/stop.go b/components/cluster/command/stop.go index 9a03e40d65..b38d81282b 100644 --- a/components/cluster/command/stop.go +++ b/components/cluster/command/stop.go @@ -33,7 +33,7 @@ func newStopCmd() *cobra.Command { clusterName := args[0] teleCommand = append(teleCommand, scrubClusterName(clusterName)) - return manager.StopCluster(clusterName, gOpt) + return cm.StopCluster(clusterName, gOpt) }, } diff --git a/components/cluster/command/upgrade.go b/components/cluster/command/upgrade.go index 66ce674315..e62738beb0 100644 --- a/components/cluster/command/upgrade.go +++ b/components/cluster/command/upgrade.go @@ -31,7 +31,7 @@ func newUpgradeCmd() *cobra.Command { teleCommand = append(teleCommand, scrubClusterName(clusterName)) teleCommand = append(teleCommand, version) - return manager.Upgrade(clusterName, version, gOpt, skipConfirm) + return cm.Upgrade(clusterName, version, gOpt, skipConfirm) }, } cmd.Flags().BoolVar(&gOpt.Force, "force", false, "Force upgrade without transferring PD leader") diff --git a/components/dm/command/deploy.go b/components/dm/command/deploy.go index ba15bb46ee..4e24f6af73 100644 --- a/components/dm/command/deploy.go +++ b/components/dm/command/deploy.go @@ -18,15 +18,15 @@ import ( "github.com/pingcap/errors" "github.com/pingcap/tiup/pkg/cliutil" - "github.com/pingcap/tiup/pkg/cluster" "github.com/pingcap/tiup/pkg/cluster/executor" + "github.com/pingcap/tiup/pkg/cluster/manager" "github.com/pingcap/tiup/pkg/utils" "github.com/spf13/cobra" "golang.org/x/mod/semver" ) func newDeployCmd() *cobra.Command { - opt := cluster.DeployOptions{ + opt := manager.DeployOptions{ IdentityFile: path.Join(utils.UserHome(), ".ssh", "id_rsa"), } cmd := &cobra.Command{ @@ -56,17 +56,7 @@ func newDeployCmd() *cobra.Command { return err } - return manager.Deploy( - clusterName, - version, - topoFile, - opt, - nil, - skipConfirm, - gOpt.OptTimeout, - gOpt.SSHTimeout, - gOpt.SSHType, - ) + return cm.Deploy(clusterName, version, topoFile, opt, nil, skipConfirm, gOpt) }, } diff --git a/components/dm/command/destroy.go b/components/dm/command/destroy.go index 3571db677a..8d29c31082 100644 --- a/components/dm/command/destroy.go +++ b/components/dm/command/destroy.go @@ -30,7 +30,7 @@ func newDestroyCmd() *cobra.Command { clusterName := args[0] - return manager.DestroyCluster(clusterName, gOpt, operator.Options{}, skipConfirm) + return cm.DestroyCluster(clusterName, gOpt, operator.Options{}, skipConfirm) }, } diff --git a/components/dm/command/display.go b/components/dm/command/display.go index b0a183363c..47c3b464f4 100644 --- a/components/dm/command/display.go +++ b/components/dm/command/display.go @@ -31,7 +31,7 @@ func newDisplayCmd() *cobra.Command { clusterName = args[0] - return manager.Display(clusterName, gOpt) + return cm.Display(clusterName, gOpt) }, } diff --git a/components/dm/command/edit_config.go b/components/dm/command/edit_config.go index d5c2a467f1..1d3d3e5d59 100644 --- a/components/dm/command/edit_config.go +++ b/components/dm/command/edit_config.go @@ -28,7 +28,7 @@ func newEditConfigCmd() *cobra.Command { clusterName := args[0] - return manager.EditConfig(clusterName, skipConfirm) + return cm.EditConfig(clusterName, skipConfirm) }, } diff --git a/components/dm/command/exec.go b/components/dm/command/exec.go index 2c78bb2eb1..f5f9cc883b 100644 --- a/components/dm/command/exec.go +++ b/components/dm/command/exec.go @@ -14,12 +14,12 @@ package command import ( - "github.com/pingcap/tiup/pkg/cluster" + "github.com/pingcap/tiup/pkg/cluster/manager" "github.com/spf13/cobra" ) func newExecCmd() *cobra.Command { - opt := cluster.ExecOptions{} + opt := manager.ExecOptions{} cmd := &cobra.Command{ Use: "exec ", Short: "Run shell command on host in the dm cluster", @@ -30,7 +30,7 @@ func newExecCmd() *cobra.Command { clusterName := args[0] - return manager.Exec(clusterName, opt, gOpt) + return cm.Exec(clusterName, opt, gOpt) }, } diff --git a/components/dm/command/import.go b/components/dm/command/import.go index 12d321255a..f568ae6eae 100644 --- a/components/dm/command/import.go +++ b/components/dm/command/import.go @@ -21,8 +21,8 @@ import ( "github.com/pingcap/errors" "github.com/pingcap/tiup/components/dm/ansible" "github.com/pingcap/tiup/pkg/cliutil" - "github.com/pingcap/tiup/pkg/cluster" cansible "github.com/pingcap/tiup/pkg/cluster/ansible" + "github.com/pingcap/tiup/pkg/cluster/manager" tiuputils "github.com/pingcap/tiup/pkg/utils" "github.com/spf13/cobra" "gopkg.in/yaml.v2" @@ -90,19 +90,17 @@ func newImportCmd() *cobra.Command { } } - err = manager.Deploy( + err = cm.Deploy( clusterName, clusterVersion, f.Name(), - cluster.DeployOptions{ + manager.DeployOptions{ IdentityFile: cansible.SSHKeyPath(), User: tiuputils.CurrentUser(), }, nil, skipConfirm, - gOpt.OptTimeout, - gOpt.SSHTimeout, - gOpt.SSHType, + gOpt, ) if err != nil { diff --git a/components/dm/command/list.go b/components/dm/command/list.go index 363badb33e..9fe8335cc4 100644 --- a/components/dm/command/list.go +++ b/components/dm/command/list.go @@ -22,7 +22,7 @@ func newListCmd() *cobra.Command { Use: "list", Short: "List all clusters", RunE: func(cmd *cobra.Command, args []string) error { - return manager.ListCluster() + return cm.ListCluster() }, } return cmd diff --git a/components/dm/command/patch.go b/components/dm/command/patch.go index 68a4182ddf..a4ec859b2c 100644 --- a/components/dm/command/patch.go +++ b/components/dm/command/patch.go @@ -40,7 +40,7 @@ func newPatchCmd() *cobra.Command { clusterName := args[0] - return manager.Patch(clusterName, args[1], gOpt, overwrite) + return cm.Patch(clusterName, args[1], gOpt, overwrite) }, } diff --git a/components/dm/command/reload.go b/components/dm/command/reload.go index f2a58c2b62..07a1b8b56f 100644 --- a/components/dm/command/reload.go +++ b/components/dm/command/reload.go @@ -35,7 +35,7 @@ func newReloadCmd() *cobra.Command { clusterName := args[0] - return manager.Reload(clusterName, gOpt, skipRestart) + return cm.Reload(clusterName, gOpt, skipRestart) }, } diff --git a/components/dm/command/restart.go b/components/dm/command/restart.go index 87cfd6be1c..ef19a3d774 100644 --- a/components/dm/command/restart.go +++ b/components/dm/command/restart.go @@ -28,7 +28,7 @@ func newRestartCmd() *cobra.Command { clusterName := args[0] - return manager.RestartCluster(clusterName, gOpt) + return cm.RestartCluster(clusterName, gOpt) }, } diff --git a/components/dm/command/root.go b/components/dm/command/root.go index 8b651ea951..22b461b01d 100644 --- a/components/dm/command/root.go +++ b/components/dm/command/root.go @@ -22,9 +22,9 @@ import ( "github.com/joomcode/errorx" "github.com/pingcap/tiup/components/dm/spec" "github.com/pingcap/tiup/pkg/cliutil" - "github.com/pingcap/tiup/pkg/cluster" "github.com/pingcap/tiup/pkg/cluster/executor" "github.com/pingcap/tiup/pkg/cluster/flags" + "github.com/pingcap/tiup/pkg/cluster/manager" operator "github.com/pingcap/tiup/pkg/cluster/operation" cspec "github.com/pingcap/tiup/pkg/cluster/spec" "github.com/pingcap/tiup/pkg/colorutil" @@ -47,7 +47,7 @@ var ( ) var dmspec *cspec.SpecManager -var manager *cluster.Manager +var cm *manager.Manager func init() { logger.InitGlobalLogger() @@ -78,7 +78,7 @@ func init() { dmspec = spec.GetSpecManager() logger.EnableAuditLog(cspec.AuditDir()) - manager = cluster.NewManager("dm", dmspec, spec.DMComponentVersion) + cm = manager.NewManager("dm", dmspec, spec.DMComponentVersion) // Running in other OS/ARCH Should be fine we only download manifest file. env, err = tiupmeta.InitEnv(repository.Options{ diff --git a/components/dm/command/scale_in.go b/components/dm/command/scale_in.go index 887a6ee8bb..0b9104e84f 100644 --- a/components/dm/command/scale_in.go +++ b/components/dm/command/scale_in.go @@ -51,16 +51,7 @@ func newScaleInCmd() *cobra.Command { ).Serial(dmtask.NewUpdateDMMeta(clusterName, metadata, gOpt.Nodes)) } - return manager.ScaleIn( - clusterName, - skipConfirm, - gOpt.OptTimeout, - gOpt.SSHTimeout, - gOpt.SSHType, - gOpt.Force, - gOpt.Nodes, - scale, - ) + return cm.ScaleIn(clusterName, skipConfirm, gOpt, scale) }, } diff --git a/components/dm/command/scale_out.go b/components/dm/command/scale_out.go index 1493461814..18d8b0a400 100644 --- a/components/dm/command/scale_out.go +++ b/components/dm/command/scale_out.go @@ -16,14 +16,14 @@ package command import ( "path/filepath" - "github.com/pingcap/tiup/pkg/cluster" "github.com/pingcap/tiup/pkg/cluster/executor" + "github.com/pingcap/tiup/pkg/cluster/manager" "github.com/pingcap/tiup/pkg/utils" "github.com/spf13/cobra" ) func newScaleOutCmd() *cobra.Command { - opt := cluster.ScaleOutOptions{ + opt := manager.ScaleOutOptions{ IdentityFile: filepath.Join(utils.UserHome(), ".ssh", "id_rsa"), } cmd := &cobra.Command{ @@ -43,17 +43,7 @@ func newScaleOutCmd() *cobra.Command { clusterName := args[0] topoFile := args[1] - return manager.ScaleOut( - clusterName, - topoFile, - nil, - nil, - opt, - skipConfirm, - gOpt.OptTimeout, - gOpt.SSHTimeout, - gOpt.SSHType, - ) + return cm.ScaleOut(clusterName, topoFile, nil, nil, opt, skipConfirm, gOpt) }, } diff --git a/components/dm/command/start.go b/components/dm/command/start.go index c3ad9cb44c..1b471800a1 100644 --- a/components/dm/command/start.go +++ b/components/dm/command/start.go @@ -28,7 +28,7 @@ func newStartCmd() *cobra.Command { clusterName := args[0] - return manager.StartCluster(clusterName, gOpt) + return cm.StartCluster(clusterName, gOpt) }, } diff --git a/components/dm/command/stop.go b/components/dm/command/stop.go index 14cb3961a7..2eeb12389f 100644 --- a/components/dm/command/stop.go +++ b/components/dm/command/stop.go @@ -28,7 +28,7 @@ func newStopCmd() *cobra.Command { clusterName := args[0] - return manager.StopCluster(clusterName, gOpt) + return cm.StopCluster(clusterName, gOpt) }, } diff --git a/components/dm/command/upgrade.go b/components/dm/command/upgrade.go index 9743954e17..f83006c4b4 100644 --- a/components/dm/command/upgrade.go +++ b/components/dm/command/upgrade.go @@ -26,7 +26,7 @@ func newUpgradeCmd() *cobra.Command { return cmd.Help() } - return manager.Upgrade(args[0], args[1], gOpt, skipConfirm) + return cm.Upgrade(args[0], args[1], gOpt, skipConfirm) }, } diff --git a/pkg/cluster/manager.go b/pkg/cluster/manager.go deleted file mode 100644 index 4ac4dc37eb..0000000000 --- a/pkg/cluster/manager.go +++ /dev/null @@ -1,2510 +0,0 @@ -// Copyright 2020 PingCAP, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// See the License for the specific language governing permissions and -// limitations under the License. - -package cluster - -import ( - "bytes" - "crypto/tls" - "crypto/x509" - "encoding/pem" - "errors" - "fmt" - "io" - "io/ioutil" - "os" - "os/exec" - "path" - "path/filepath" - "sort" - "strings" - "time" - - "github.com/fatih/color" - "github.com/joomcode/errorx" - perrs "github.com/pingcap/errors" - "github.com/pingcap/tiup/pkg/cliutil" - "github.com/pingcap/tiup/pkg/cluster/api" - "github.com/pingcap/tiup/pkg/cluster/clusterutil" - "github.com/pingcap/tiup/pkg/cluster/executor" - operator "github.com/pingcap/tiup/pkg/cluster/operation" - "github.com/pingcap/tiup/pkg/cluster/spec" - "github.com/pingcap/tiup/pkg/cluster/task" - "github.com/pingcap/tiup/pkg/crypto" - "github.com/pingcap/tiup/pkg/environment" - "github.com/pingcap/tiup/pkg/errutil" - "github.com/pingcap/tiup/pkg/file" - "github.com/pingcap/tiup/pkg/logger/log" - "github.com/pingcap/tiup/pkg/meta" - pkgver "github.com/pingcap/tiup/pkg/repository/version" - "github.com/pingcap/tiup/pkg/set" - "github.com/pingcap/tiup/pkg/utils" - "github.com/pingcap/tiup/pkg/version" - "golang.org/x/mod/semver" - "gopkg.in/yaml.v2" -) - -var ( - errNSDeploy = errorx.NewNamespace("deploy") - errDeployNameDuplicate = errNSDeploy.NewType("name_dup", errutil.ErrTraitPreCheck) - - errNSRename = errorx.NewNamespace("rename") - errorRenameNameNotExist = errNSRename.NewType("name_not_exist", errutil.ErrTraitPreCheck) - errorRenameNameDuplicate = errNSRename.NewType("name_dup", errutil.ErrTraitPreCheck) -) - -// Manager to deploy a cluster. -type Manager struct { - sysName string - specManager *spec.SpecManager - bindVersion spec.BindVersion -} - -// NewManager create a Manager. -func NewManager(sysName string, specManager *spec.SpecManager, bindVersion spec.BindVersion) *Manager { - return &Manager{ - sysName: sysName, - specManager: specManager, - bindVersion: bindVersion, - } -} - -// EnableCluster enable/disable the service in a cluster -func (m *Manager) EnableCluster(name string, options operator.Options, isEnable bool) error { - if isEnable { - log.Infof("Enabling cluster %s...", name) - } else { - log.Infof("Disabling cluster %s...", name) - } - - metadata, err := m.meta(name) - if err != nil && !errors.Is(perrs.Cause(err), meta.ErrValidate) { - return perrs.AddStack(err) - } - - topo := metadata.GetTopology() - base := metadata.GetBaseMeta() - - b := task.NewBuilder(). - SSHKeySet( - m.specManager.Path(name, "ssh", "id_rsa"), - m.specManager.Path(name, "ssh", "id_rsa.pub")). - ClusterSSH(topo, base.User, options.SSHTimeout, options.SSHType, topo.BaseTopo().GlobalOptions.SSHType) - - if isEnable { - b = b.Func("EnableCluster", func(ctx *task.Context) error { - return operator.Enable(ctx, topo, options, isEnable) - }) - } else { - b = b.Func("DisableCluster", func(ctx *task.Context) error { - return operator.Enable(ctx, topo, options, isEnable) - }) - } - - t := b.Build() - - if err := t.Execute(task.NewContext()); err != nil { - if errorx.Cast(err) != nil { - // FIXME: Map possible task errors and give suggestions. - return err - } - return perrs.Trace(err) - } - - if isEnable { - log.Infof("Enabled cluster `%s` successfully", name) - } else { - log.Infof("Disabled cluster `%s` successfully", name) - } - - return nil -} - -// StartCluster start the cluster with specified name. -func (m *Manager) StartCluster(name string, options operator.Options, fn ...func(b *task.Builder, metadata spec.Metadata)) error { - log.Infof("Starting cluster %s...", name) - - metadata, err := m.meta(name) - if err != nil && !errors.Is(perrs.Cause(err), meta.ErrValidate) { - return perrs.AddStack(err) - } - - topo := metadata.GetTopology() - base := metadata.GetBaseMeta() - - tlsCfg, err := topo.TLSConfig(m.specManager.Path(name, spec.TLSCertKeyDir)) - if err != nil { - return perrs.AddStack(err) - } - - b := task.NewBuilder(). - SSHKeySet( - m.specManager.Path(name, "ssh", "id_rsa"), - m.specManager.Path(name, "ssh", "id_rsa.pub")). - ClusterSSH(topo, base.User, options.SSHTimeout, options.SSHType, topo.BaseTopo().GlobalOptions.SSHType). - Func("StartCluster", func(ctx *task.Context) error { - return operator.Start(ctx, topo, options, tlsCfg) - }) - - for _, f := range fn { - f(b, metadata) - } - - t := b.Build() - - if err := t.Execute(task.NewContext()); err != nil { - if errorx.Cast(err) != nil { - // FIXME: Map possible task errors and give suggestions. - return err - } - return perrs.Trace(err) - } - - log.Infof("Started cluster `%s` successfully", name) - return nil -} - -// StopCluster stop the cluster. -func (m *Manager) StopCluster(clusterName string, options operator.Options) error { - metadata, err := m.meta(clusterName) - if err != nil && !errors.Is(perrs.Cause(err), meta.ErrValidate) { - return perrs.AddStack(err) - } - - topo := metadata.GetTopology() - base := metadata.GetBaseMeta() - - tlsCfg, err := topo.TLSConfig(m.specManager.Path(clusterName, spec.TLSCertKeyDir)) - if err != nil { - return err - } - - t := task.NewBuilder(). - SSHKeySet( - m.specManager.Path(clusterName, "ssh", "id_rsa"), - m.specManager.Path(clusterName, "ssh", "id_rsa.pub")). - ClusterSSH(metadata.GetTopology(), base.User, options.SSHTimeout, options.SSHType, topo.BaseTopo().GlobalOptions.SSHType). - Func("StopCluster", func(ctx *task.Context) error { - return operator.Stop(ctx, topo, options, tlsCfg) - }). - Build() - - if err := t.Execute(task.NewContext()); err != nil { - if errorx.Cast(err) != nil { - // FIXME: Map possible task errors and give suggestions. - return err - } - return perrs.Trace(err) - } - - log.Infof("Stopped cluster `%s` successfully", clusterName) - return nil -} - -// RestartCluster restart the cluster. -func (m *Manager) RestartCluster(clusterName string, options operator.Options) error { - metadata, err := m.meta(clusterName) - if err != nil && !errors.Is(perrs.Cause(err), meta.ErrValidate) { - return perrs.AddStack(err) - } - - topo := metadata.GetTopology() - base := metadata.GetBaseMeta() - - tlsCfg, err := topo.TLSConfig(m.specManager.Path(clusterName, spec.TLSCertKeyDir)) - if err != nil { - return err - } - - t := task.NewBuilder(). - SSHKeySet( - m.specManager.Path(clusterName, "ssh", "id_rsa"), - m.specManager.Path(clusterName, "ssh", "id_rsa.pub")). - ClusterSSH(topo, base.User, options.SSHTimeout, options.SSHType, topo.BaseTopo().GlobalOptions.SSHType). - Func("RestartCluster", func(ctx *task.Context) error { - return operator.Restart(ctx, topo, options, tlsCfg) - }). - Build() - - if err := t.Execute(task.NewContext()); err != nil { - if errorx.Cast(err) != nil { - // FIXME: Map possible task errors and give suggestions. - return err - } - return perrs.Trace(err) - } - - log.Infof("Restarted cluster `%s` successfully", clusterName) - return nil -} - -// ListCluster list the clusters. -func (m *Manager) ListCluster() error { - names, err := m.specManager.List() - if err != nil { - return perrs.AddStack(err) - } - - clusterTable := [][]string{ - // Header - {"Name", "User", "Version", "Path", "PrivateKey"}, - } - - for _, name := range names { - metadata, err := m.meta(name) - if err != nil && !errors.Is(perrs.Cause(err), meta.ErrValidate) && - !errors.Is(perrs.Cause(err), spec.ErrNoTiSparkMaster) { - return perrs.Trace(err) - } - - base := metadata.GetBaseMeta() - - clusterTable = append(clusterTable, []string{ - name, - base.User, - base.Version, - m.specManager.Path(name), - m.specManager.Path(name, "ssh", "id_rsa"), - }) - } - - cliutil.PrintTable(clusterTable, true) - return nil -} - -// CleanCluster clean the cluster without destroying it -func (m *Manager) CleanCluster(clusterName string, gOpt operator.Options, cleanOpt operator.Options, skipConfirm bool) error { - metadata, err := m.meta(clusterName) - if err != nil { - return err - } - - topo := metadata.GetTopology() - base := metadata.GetBaseMeta() - - tlsCfg, err := topo.TLSConfig(m.specManager.Path(clusterName, spec.TLSCertKeyDir)) - if err != nil { - return err - } - - if !skipConfirm { - target := "" - switch { - case cleanOpt.CleanupData && cleanOpt.CleanupLog: - target = "data and log" - case cleanOpt.CleanupData: - target = "data" - case cleanOpt.CleanupLog: - target = "log" - } - if err := cliutil.PromptForConfirmOrAbortError( - "This operation will clean %s %s cluster %s's %s.\nNodes will be ignored: %s\nRoles will be ignored: %s\nDo you want to continue? [y/N]:", - m.sysName, - color.HiYellowString(base.Version), - color.HiYellowString(clusterName), - target, - cleanOpt.RetainDataNodes, - cleanOpt.RetainDataRoles); err != nil { - return err - } - log.Infof("Cleanup cluster...") - } - - t := task.NewBuilder(). - SSHKeySet( - m.specManager.Path(clusterName, "ssh", "id_rsa"), - m.specManager.Path(clusterName, "ssh", "id_rsa.pub")). - ClusterSSH(topo, base.User, gOpt.SSHTimeout, gOpt.SSHType, topo.BaseTopo().GlobalOptions.SSHType). - Func("StopCluster", func(ctx *task.Context) error { - return operator.Stop(ctx, topo, operator.Options{}, tlsCfg) - }). - Func("CleanupCluster", func(ctx *task.Context) error { - return operator.Cleanup(ctx, topo, cleanOpt) - }). - Build() - - if err := t.Execute(task.NewContext()); err != nil { - if errorx.Cast(err) != nil { - // FIXME: Map possible task errors and give suggestions. - return err - } - return perrs.Trace(err) - } - - log.Infof("Cleanup cluster `%s` successfully", clusterName) - return nil -} - -// DestroyCluster destroy the cluster. -func (m *Manager) DestroyCluster(clusterName string, gOpt operator.Options, destroyOpt operator.Options, skipConfirm bool) error { - metadata, err := m.meta(clusterName) - if err != nil && !errors.Is(perrs.Cause(err), meta.ErrValidate) && - !errors.Is(perrs.Cause(err), spec.ErrNoTiSparkMaster) && - !errors.Is(perrs.Cause(err), spec.ErrMultipleTiSparkMaster) && - !errors.Is(perrs.Cause(err), spec.ErrMultipleTisparkWorker) { - return perrs.AddStack(err) - } - - topo := metadata.GetTopology() - base := metadata.GetBaseMeta() - - tlsCfg, err := topo.TLSConfig(m.specManager.Path(clusterName, spec.TLSCertKeyDir)) - if err != nil { - return err - } - - if !skipConfirm { - if err := cliutil.PromptForConfirmOrAbortError( - "This operation will destroy %s %s cluster %s and its data.\nDo you want to continue? [y/N]:", - m.sysName, - color.HiYellowString(base.Version), - color.HiYellowString(clusterName)); err != nil { - return err - } - log.Infof("Destroying cluster...") - } - - t := task.NewBuilder(). - SSHKeySet( - m.specManager.Path(clusterName, "ssh", "id_rsa"), - m.specManager.Path(clusterName, "ssh", "id_rsa.pub")). - ClusterSSH(topo, base.User, gOpt.SSHTimeout, gOpt.SSHType, topo.BaseTopo().GlobalOptions.SSHType). - Func("StopCluster", func(ctx *task.Context) error { - return operator.Stop(ctx, topo, operator.Options{ - Force: destroyOpt.Force, - }, tlsCfg) - }). - Func("DestroyCluster", func(ctx *task.Context) error { - return operator.Destroy(ctx, topo, destroyOpt) - }). - Build() - - if err := t.Execute(task.NewContext()); err != nil { - if errorx.Cast(err) != nil { - // FIXME: Map possible task errors and give suggestions. - return err - } - return perrs.Trace(err) - } - - if err := m.specManager.Remove(clusterName); err != nil { - return perrs.Trace(err) - } - - log.Infof("Destroyed cluster `%s` successfully", clusterName) - return nil -} - -// ExecOptions for exec shell commanm. -type ExecOptions struct { - Command string - Sudo bool -} - -// Exec shell command on host in the tidb cluster. -func (m *Manager) Exec(clusterName string, opt ExecOptions, gOpt operator.Options) error { - metadata, err := m.meta(clusterName) - if err != nil { - return perrs.AddStack(err) - } - - topo := metadata.GetTopology() - base := metadata.GetBaseMeta() - - filterRoles := set.NewStringSet(gOpt.Roles...) - filterNodes := set.NewStringSet(gOpt.Nodes...) - - var shellTasks []task.Task - uniqueHosts := map[string]int{} // host -> ssh-port - topo.IterInstance(func(inst spec.Instance) { - if _, found := uniqueHosts[inst.GetHost()]; !found { - if len(gOpt.Roles) > 0 && !filterRoles.Exist(inst.Role()) { - return - } - - if len(gOpt.Nodes) > 0 && !filterNodes.Exist(inst.GetHost()) { - return - } - - uniqueHosts[inst.GetHost()] = inst.GetSSHPort() - } - }) - - for host := range uniqueHosts { - shellTasks = append(shellTasks, - task.NewBuilder(). - Shell(host, opt.Command, opt.Sudo). - Build()) - } - - t := task.NewBuilder(). - SSHKeySet( - m.specManager.Path(clusterName, "ssh", "id_rsa"), - m.specManager.Path(clusterName, "ssh", "id_rsa.pub")). - ClusterSSH(topo, base.User, gOpt.SSHTimeout, gOpt.SSHType, topo.BaseTopo().GlobalOptions.SSHType). - Parallel(false, shellTasks...). - Build() - - execCtx := task.NewContext() - if err := t.Execute(execCtx); err != nil { - if errorx.Cast(err) != nil { - // FIXME: Map possible task errors and give suggestions. - return err - } - return perrs.Trace(err) - } - - // print outputs - for host := range uniqueHosts { - stdout, stderr, ok := execCtx.GetOutputs(host) - if !ok { - continue - } - log.Infof("Outputs of %s on %s:", - color.CyanString(opt.Command), - color.CyanString(host)) - if len(stdout) > 0 { - log.Infof("%s:\n%s", color.GreenString("stdout"), stdout) - } - if len(stderr) > 0 { - log.Infof("%s:\n%s", color.RedString("stderr"), stderr) - } - } - - return nil -} - -// Display cluster meta and topology. -func (m *Manager) Display(clusterName string, opt operator.Options) error { - metadata, err := m.meta(clusterName) - if err != nil && !errors.Is(perrs.Cause(err), meta.ErrValidate) && - !errors.Is(perrs.Cause(err), spec.ErrNoTiSparkMaster) { - return perrs.AddStack(err) - } - - topo := metadata.GetTopology() - base := metadata.GetBaseMeta() - // display cluster meta - cyan := color.New(color.FgCyan, color.Bold) - fmt.Printf("Cluster type: %s\n", cyan.Sprint(m.sysName)) - fmt.Printf("Cluster name: %s\n", cyan.Sprint(clusterName)) - fmt.Printf("Cluster version: %s\n", cyan.Sprint(base.Version)) - fmt.Printf("SSH type: %s\n", cyan.Sprint(topo.BaseTopo().GlobalOptions.SSHType)) - - // display TLS info - if topo.BaseTopo().GlobalOptions.TLSEnabled { - fmt.Printf("TLS encryption: %s\n", cyan.Sprint("enabled")) - fmt.Printf("CA certificate: %s\n", cyan.Sprint( - m.specManager.Path(clusterName, spec.TLSCertKeyDir, spec.TLSCACert), - )) - fmt.Printf("Client private key: %s\n", cyan.Sprint( - m.specManager.Path(clusterName, spec.TLSCertKeyDir, spec.TLSClientKey), - )) - fmt.Printf("Client certificate: %s\n", cyan.Sprint( - m.specManager.Path(clusterName, spec.TLSCertKeyDir, spec.TLSClientCert), - )) - } - - // display topology - clusterTable := [][]string{ - // Header - {"ID", "Role", "Host", "Ports", "OS/Arch", "Status", "Data Dir", "Deploy Dir"}, - } - - ctx := task.NewContext() - err = ctx.SetSSHKeySet(m.specManager.Path(clusterName, "ssh", "id_rsa"), - m.specManager.Path(clusterName, "ssh", "id_rsa.pub")) - if err != nil { - return perrs.AddStack(err) - } - - err = ctx.SetClusterSSH(topo, base.User, opt.SSHTimeout, opt.SSHType, topo.BaseTopo().GlobalOptions.SSHType) - if err != nil { - return perrs.AddStack(err) - } - - filterRoles := set.NewStringSet(opt.Roles...) - filterNodes := set.NewStringSet(opt.Nodes...) - pdList := topo.BaseTopo().MasterList - tlsCfg, err := topo.TLSConfig(m.specManager.Path(clusterName, spec.TLSCertKeyDir)) - if err != nil { - return err - } - for _, comp := range topo.ComponentsByStartOrder() { - for _, ins := range comp.Instances() { - // apply role filter - if len(filterRoles) > 0 && !filterRoles.Exist(ins.Role()) { - continue - } - // apply node filter - if len(filterNodes) > 0 && !filterNodes.Exist(ins.ID()) { - continue - } - - dataDir := "-" - insDirs := ins.UsedDirs() - deployDir := insDirs[0] - if len(insDirs) > 1 { - dataDir = insDirs[1] - } - - status := ins.Status(tlsCfg, pdList...) - // Query the service status - if status == "-" { - e, found := ctx.GetExecutor(ins.GetHost()) - if found { - active, _ := operator.GetServiceStatus(e, ins.ServiceName()) - if parts := strings.Split(strings.TrimSpace(active), " "); len(parts) > 2 { - if parts[1] == "active" { - status = "Up" - } else { - status = parts[1] - } - } - } - } - clusterTable = append(clusterTable, []string{ - color.CyanString(ins.ID()), - ins.Role(), - ins.GetHost(), - utils.JoinInt(ins.UsedPorts(), "/"), - cliutil.OsArch(ins.OS(), ins.Arch()), - formatInstanceStatus(status), - dataDir, - deployDir, - }) - } - } - - // Sort by role,host,ports - sort.Slice(clusterTable[1:], func(i, j int) bool { - lhs, rhs := clusterTable[i+1], clusterTable[j+1] - // column: 1 => role, 2 => host, 3 => ports - for _, col := range []int{1, 2} { - if lhs[col] != rhs[col] { - return lhs[col] < rhs[col] - } - } - return lhs[3] < rhs[3] - }) - - cliutil.PrintTable(clusterTable, true) - fmt.Printf("Total nodes: %d\n", len(clusterTable)-1) - - if t, ok := topo.(*spec.Specification); ok { - // Check if TiKV's label set correctly - pdClient := api.NewPDClient(pdList, 10*time.Second, tlsCfg) - if lbs, err := pdClient.GetLocationLabels(); err != nil { - log.Debugf("get location labels from pd failed: %v", err) - } else if err := spec.CheckTiKVLabels(lbs, pdClient); err != nil { - color.Yellow("\nWARN: there is something wrong with TiKV labels, which may cause data losing:\n%v", err) - } - - // Check if there is some instance in tombstone state - nodes, _ := operator.DestroyTombstone(ctx, t, true /* returnNodesOnly */, opt, tlsCfg) - if len(nodes) != 0 { - color.Green("There are some nodes can be pruned: \n\tNodes: %+v\n\tYou can destroy them with the command: `tiup cluster prune %s`", nodes, clusterName) - } - } - - return nil -} - -// EditConfig let the user edit the config. -func (m *Manager) EditConfig(clusterName string, skipConfirm bool) error { - metadata, err := m.meta(clusterName) - if err != nil && !errors.Is(perrs.Cause(err), meta.ErrValidate) { - return perrs.AddStack(err) - } - - topo := metadata.GetTopology() - - data, err := yaml.Marshal(topo) - if err != nil { - return perrs.AddStack(err) - } - - newTopo, err := m.editTopo(topo, data, skipConfirm) - if err != nil { - return perrs.AddStack(err) - } - - if newTopo == nil { - return nil - } - - log.Infof("Apply the change...") - metadata.SetTopology(newTopo) - err = m.specManager.SaveMeta(clusterName, metadata) - if err != nil { - return perrs.Annotate(err, "failed to save meta") - } - - log.Infof("Apply change successfully, please use `%s reload %s [-N ] [-R ]` to reload config.", cliutil.OsArgs0(), clusterName) - return nil -} - -// Rename the cluster -func (m *Manager) Rename(clusterName string, opt operator.Options, newName string) error { - if !utils.IsExist(m.specManager.Path(clusterName)) { - return errorRenameNameNotExist. - New("Cluster name '%s' not exist", clusterName). - WithProperty(cliutil.SuggestionFromFormat("Please double check your cluster name")) - } - if utils.IsExist(m.specManager.Path(newName)) { - return errorRenameNameDuplicate. - New("Cluster name '%s' is duplicated", newName). - WithProperty(cliutil.SuggestionFromFormat("Please specify another cluster name")) - } - - _, err := m.meta(clusterName) - if err != nil { // refuse renaming if current cluster topology is not valid - return perrs.AddStack(err) - } - - if err := os.Rename(m.specManager.Path(clusterName), m.specManager.Path(newName)); err != nil { - return perrs.AddStack(err) - } - - log.Infof("Rename cluster `%s` -> `%s` successfully", clusterName, newName) - - opt.Roles = []string{spec.ComponentGrafana, spec.ComponentPrometheus} - return m.Reload(newName, opt, false) -} - -// Reload the cluster. -func (m *Manager) Reload(clusterName string, opt operator.Options, skipRestart bool) error { - sshTimeout := opt.SSHTimeout - - metadata, err := m.meta(clusterName) - if err != nil { - return perrs.AddStack(err) - } - - topo := metadata.GetTopology() - base := metadata.GetBaseMeta() - - var refreshConfigTasks []*task.StepDisplay - - hasImported := false - uniqueHosts := make(map[string]hostInfo) // host -> ssh-port, os, arch - - topo.IterInstance(func(inst spec.Instance) { - if _, found := uniqueHosts[inst.GetHost()]; !found { - uniqueHosts[inst.GetHost()] = hostInfo{ - ssh: inst.GetSSHPort(), - os: inst.OS(), - arch: inst.Arch(), - } - } - - deployDir := spec.Abs(base.User, inst.DeployDir()) - // data dir would be empty for components which don't need it - dataDirs := spec.MultiDirAbs(base.User, inst.DataDir()) - // log dir will always be with values, but might not used by the component - logDir := spec.Abs(base.User, inst.LogDir()) - - // Download and copy the latest component to remote if the cluster is imported from Ansible - tb := task.NewBuilder().UserSSH(inst.GetHost(), inst.GetSSHPort(), base.User, opt.SSHTimeout, opt.SSHType, topo.BaseTopo().GlobalOptions.SSHType) - if inst.IsImported() { - switch compName := inst.ComponentName(); compName { - case spec.ComponentGrafana, spec.ComponentPrometheus, spec.ComponentAlertmanager: - version := m.bindVersion(compName, base.Version) - tb.Download(compName, inst.OS(), inst.Arch(), version). - CopyComponent(compName, inst.OS(), inst.Arch(), version, "", inst.GetHost(), deployDir) - } - hasImported = true - } - - // Refresh all configuration - t := tb.InitConfig(clusterName, - base.Version, - m.specManager, - inst, base.User, - opt.IgnoreConfigCheck, - meta.DirPaths{ - Deploy: deployDir, - Data: dataDirs, - Log: logDir, - Cache: m.specManager.Path(clusterName, spec.TempConfigPath), - }). - BuildAsStep(fmt.Sprintf(" - Refresh config %s -> %s", inst.ComponentName(), inst.ID())) - refreshConfigTasks = append(refreshConfigTasks, t) - }) - - monitorConfigTasks := refreshMonitoredConfigTask( - m.specManager, - clusterName, - uniqueHosts, - *topo.BaseTopo().GlobalOptions, - topo.GetMonitoredOptions(), - sshTimeout, - opt.SSHType) - - // handle dir scheme changes - if hasImported { - if err := spec.HandleImportPathMigration(clusterName); err != nil { - return perrs.AddStack(err) - } - } - - tb := task.NewBuilder(). - SSHKeySet( - m.specManager.Path(clusterName, "ssh", "id_rsa"), - m.specManager.Path(clusterName, "ssh", "id_rsa.pub")). - ClusterSSH(topo, base.User, opt.SSHTimeout, opt.SSHType, topo.BaseTopo().GlobalOptions.SSHType). - ParallelStep("+ Refresh instance configs", opt.Force, refreshConfigTasks...) - - if len(monitorConfigTasks) > 0 { - tb = tb.ParallelStep("+ Refresh monitor configs", opt.Force, monitorConfigTasks...) - } - - tlsCfg, err := topo.TLSConfig(m.specManager.Path(clusterName, spec.TLSCertKeyDir)) - if err != nil { - return err - } - if !skipRestart { - tb = tb.Func("UpgradeCluster", func(ctx *task.Context) error { - return operator.Upgrade(ctx, topo, opt, tlsCfg) - }) - } - - t := tb.Build() - - if err := t.Execute(task.NewContext()); err != nil { - if errorx.Cast(err) != nil { - // FIXME: Map possible task errors and give suggestions. - return err - } - return perrs.Trace(err) - } - - log.Infof("Reloaded cluster `%s` successfully", clusterName) - - return nil -} - -// Upgrade the cluster. -func (m *Manager) Upgrade(clusterName string, clusterVersion string, opt operator.Options, skipConfirm bool) error { - metadata, err := m.meta(clusterName) - if err != nil { - return perrs.AddStack(err) - } - - topo := metadata.GetTopology() - base := metadata.GetBaseMeta() - - var ( - downloadCompTasks []task.Task // tasks which are used to download components - copyCompTasks []task.Task // tasks which are used to copy components to remote host - - uniqueComps = map[string]struct{}{} - ) - - if err := versionCompare(base.Version, clusterVersion); err != nil { - return err - } - - if !skipConfirm { - if err := cliutil.PromptForConfirmOrAbortError( - "This operation will upgrade %s %s cluster %s to %s.\nDo you want to continue? [y/N]:", - m.sysName, - color.HiYellowString(base.Version), - color.HiYellowString(clusterName), - color.HiYellowString(clusterVersion)); err != nil { - return err - } - log.Infof("Upgrading cluster...") - } - - hasImported := false - for _, comp := range topo.ComponentsByUpdateOrder() { - for _, inst := range comp.Instances() { - version := m.bindVersion(inst.ComponentName(), clusterVersion) - compInfo := componentInfo{ - component: inst.ComponentName(), - version: version, - } - - // Download component from repository - key := fmt.Sprintf("%s-%s-%s-%s", compInfo.component, compInfo.version, inst.OS(), inst.Arch()) - if _, found := uniqueComps[key]; !found { - uniqueComps[key] = struct{}{} - t := task.NewBuilder(). - Download(inst.ComponentName(), inst.OS(), inst.Arch(), version). - Build() - downloadCompTasks = append(downloadCompTasks, t) - } - - deployDir := spec.Abs(base.User, inst.DeployDir()) - // data dir would be empty for components which don't need it - dataDirs := spec.MultiDirAbs(base.User, inst.DataDir()) - // log dir will always be with values, but might not used by the component - logDir := spec.Abs(base.User, inst.LogDir()) - - // Deploy component - tb := task.NewBuilder() - if inst.IsImported() { - switch inst.ComponentName() { - case spec.ComponentPrometheus, spec.ComponentGrafana, spec.ComponentAlertmanager: - tb.CopyComponent( - inst.ComponentName(), - inst.OS(), - inst.Arch(), - version, - "", // use default srcPath - inst.GetHost(), - deployDir, - ) - } - hasImported = true - } - - // backup files of the old version - tb = tb.BackupComponent(inst.ComponentName(), base.Version, inst.GetHost(), deployDir) - - if deployerInstance, ok := inst.(DeployerInstance); ok { - deployerInstance.Deploy(tb, "", deployDir, version, clusterName, clusterVersion) - } else { - // copy dependency component if needed - switch inst.ComponentName() { - case spec.ComponentTiSpark: - env := environment.GlobalEnv() - sparkVer, _, err := env.V1Repository().LatestStableVersion(spec.ComponentSpark, false) - if err != nil { - return err - } - tb = tb.DeploySpark(inst, sparkVer.String(), "" /* default srcPath */, deployDir) - default: - tb = tb.CopyComponent( - inst.ComponentName(), - inst.OS(), - inst.Arch(), - version, - "", // use default srcPath - inst.GetHost(), - deployDir, - ) - } - } - - tb.InitConfig( - clusterName, - clusterVersion, - m.specManager, - inst, - base.User, - opt.IgnoreConfigCheck, - meta.DirPaths{ - Deploy: deployDir, - Data: dataDirs, - Log: logDir, - Cache: m.specManager.Path(clusterName, spec.TempConfigPath), - }, - ) - copyCompTasks = append(copyCompTasks, tb.Build()) - } - } - - // handle dir scheme changes - if hasImported { - if err := spec.HandleImportPathMigration(clusterName); err != nil { - return err - } - } - - tlsCfg, err := topo.TLSConfig(m.specManager.Path(clusterName, spec.TLSCertKeyDir)) - if err != nil { - return err - } - t := task.NewBuilder(). - SSHKeySet( - m.specManager.Path(clusterName, "ssh", "id_rsa"), - m.specManager.Path(clusterName, "ssh", "id_rsa.pub")). - ClusterSSH(topo, base.User, opt.SSHTimeout, opt.SSHType, topo.BaseTopo().GlobalOptions.SSHType). - Parallel(false, downloadCompTasks...). - Parallel(opt.Force, copyCompTasks...). - Func("UpgradeCluster", func(ctx *task.Context) error { - return operator.Upgrade(ctx, topo, opt, tlsCfg) - }). - Build() - - if err := t.Execute(task.NewContext()); err != nil { - if errorx.Cast(err) != nil { - // FIXME: Map possible task errors and give suggestions. - return err - } - return perrs.Trace(err) - } - - metadata.SetVersion(clusterVersion) - - if err := m.specManager.SaveMeta(clusterName, metadata); err != nil { - return perrs.Trace(err) - } - - if err := os.RemoveAll(m.specManager.Path(clusterName, "patch")); err != nil { - return perrs.Trace(err) - } - - log.Infof("Upgraded cluster `%s` successfully", clusterName) - - return nil -} - -// Patch the cluster. -func (m *Manager) Patch(clusterName string, packagePath string, opt operator.Options, overwrite bool) error { - metadata, err := m.meta(clusterName) - if err != nil { - return perrs.AddStack(err) - } - - topo := metadata.GetTopology() - base := metadata.GetBaseMeta() - - if exist := utils.IsExist(packagePath); !exist { - return perrs.New("specified package not exists") - } - - insts, err := instancesToPatch(topo, opt) - if err != nil { - return err - } - if err := checkPackage(m.bindVersion, m.specManager, clusterName, insts[0].ComponentName(), insts[0].OS(), insts[0].Arch(), packagePath); err != nil { - return err - } - - var replacePackageTasks []task.Task - for _, inst := range insts { - deployDir := spec.Abs(base.User, inst.DeployDir()) - tb := task.NewBuilder() - tb.BackupComponent(inst.ComponentName(), base.Version, inst.GetHost(), deployDir). - InstallPackage(packagePath, inst.GetHost(), deployDir) - replacePackageTasks = append(replacePackageTasks, tb.Build()) - } - - tlsCfg, err := topo.TLSConfig(m.specManager.Path(clusterName, spec.TLSCertKeyDir)) - if err != nil { - return err - } - t := task.NewBuilder(). - SSHKeySet( - m.specManager.Path(clusterName, "ssh", "id_rsa"), - m.specManager.Path(clusterName, "ssh", "id_rsa.pub")). - ClusterSSH(topo, base.User, opt.SSHTimeout, opt.SSHType, topo.BaseTopo().GlobalOptions.SSHType). - Parallel(false, replacePackageTasks...). - Func("UpgradeCluster", func(ctx *task.Context) error { - return operator.Upgrade(ctx, topo, opt, tlsCfg) - }). - Build() - - if err := t.Execute(task.NewContext()); err != nil { - if errorx.Cast(err) != nil { - // FIXME: Map possible task errors and give suggestions. - return err - } - return perrs.Trace(err) - } - - if overwrite { - if err := overwritePatch(m.specManager, clusterName, insts[0].ComponentName(), packagePath); err != nil { - return err - } - } - - return nil -} - -// ScaleOutOptions contains the options for scale out. -type ScaleOutOptions struct { - User string // username to login to the SSH server - SkipCreateUser bool // don't create user - IdentityFile string // path to the private key file - UsePassword bool // use password instead of identity file for ssh connection - NoLabels bool // don't check labels for TiKV instance -} - -// DeployOptions contains the options for scale out. -// TODO: merge ScaleOutOptions, should check config too when scale out. -type DeployOptions struct { - User string // username to login to the SSH server - SkipCreateUser bool // don't create the user - IdentityFile string // path to the private key file - UsePassword bool // use password instead of identity file for ssh connection - IgnoreConfigCheck bool // ignore config check result - NoLabels bool // don't check labels for TiKV instance -} - -// DeployerInstance is a instance can deploy to a target deploy directory. -type DeployerInstance interface { - Deploy(b *task.Builder, srcPath string, deployDir string, version string, clusterName string, clusterVersion string) -} - -// Deploy a cluster. -func (m *Manager) Deploy( - clusterName string, - clusterVersion string, - topoFile string, - opt DeployOptions, - afterDeploy func(b *task.Builder, newPart spec.Topology), - skipConfirm bool, - optTimeout uint64, - sshTimeout uint64, - sshType executor.SSHType, -) error { - if err := clusterutil.ValidateClusterNameOrError(clusterName); err != nil { - return err - } - - exist, err := m.specManager.Exist(clusterName) - if err != nil { - return perrs.AddStack(err) - } - - if exist { - // FIXME: When change to use args, the suggestion text need to be updatem. - return errDeployNameDuplicate. - New("Cluster name '%s' is duplicated", clusterName). - WithProperty(cliutil.SuggestionFromFormat("Please specify another cluster name")) - } - - metadata := m.specManager.NewMetadata() - topo := metadata.GetTopology() - - if err := spec.ParseTopologyYaml(topoFile, topo); err != nil { - return err - } - - spec.ExpandRelativeDir(topo) - - base := topo.BaseTopo() - if sshType != "" { - base.GlobalOptions.SSHType = sshType - } - - if topo, ok := topo.(*spec.Specification); ok && !opt.NoLabels { - // Check if TiKV's label set correctly - lbs, err := topo.LocationLabels() - if err != nil { - return err - } - if err := spec.CheckTiKVLabels(lbs, topo); err != nil { - return perrs.Errorf("check TiKV label failed, please fix that before continue:\n%s", err) - } - } - - clusterList, err := m.specManager.GetAllClusters() - if err != nil { - return err - } - if err := spec.CheckClusterPortConflict(clusterList, clusterName, topo); err != nil { - return err - } - if err := spec.CheckClusterDirConflict(clusterList, clusterName, topo); err != nil { - return err - } - - if !skipConfirm { - if err := m.confirmTopology(clusterName, clusterVersion, topo, set.NewStringSet()); err != nil { - return err - } - } - - var sshConnProps *cliutil.SSHConnectionProps = &cliutil.SSHConnectionProps{} - if sshType != executor.SSHTypeNone { - var err error - if sshConnProps, err = cliutil.ReadIdentityFileOrPassword(opt.IdentityFile, opt.UsePassword); err != nil { - return err - } - } - - if err := os.MkdirAll(m.specManager.Path(clusterName), 0755); err != nil { - return errorx.InitializationFailed. - Wrap(err, "Failed to create cluster metadata directory '%s'", m.specManager.Path(clusterName)). - WithProperty(cliutil.SuggestionFromString("Please check file system permissions and try again.")) - } - - var ( - envInitTasks []*task.StepDisplay // tasks which are used to initialize environment - downloadCompTasks []*task.StepDisplay // tasks which are used to download components - deployCompTasks []*task.StepDisplay // tasks which are used to copy components to remote host - ) - - // Initialize environment - uniqueHosts := make(map[string]hostInfo) // host -> ssh-port, os, arch - globalOptions := base.GlobalOptions - - // generate CA and client cert for TLS enabled cluster - var ca *crypto.CertificateAuthority - if globalOptions.TLSEnabled { - // generate CA - tlsPath := m.specManager.Path(clusterName, spec.TLSCertKeyDir) - if err := utils.CreateDir(tlsPath); err != nil { - return err - } - ca, err = genAndSaveClusterCA(clusterName, tlsPath) - if err != nil { - return err - } - - // generate client cert - if err = genAndSaveClientCert(ca, clusterName, tlsPath); err != nil { - return err - } - } - - var iterErr error // error when itering over instances - iterErr = nil - topo.IterInstance(func(inst spec.Instance) { - if _, found := uniqueHosts[inst.GetHost()]; !found { - // check for "imported" parameter, it can not be true when scaling out - if inst.IsImported() { - iterErr = errors.New( - "'imported' is set to 'true' for new instance, this is only used " + - "for instances imported from tidb-ansible and make no sense when " + - "deploying new instances, please delete the line or set it to 'false' for new instances") - return // skip the host to avoid issues - } - - uniqueHosts[inst.GetHost()] = hostInfo{ - ssh: inst.GetSSHPort(), - os: inst.OS(), - arch: inst.Arch(), - } - var dirs []string - for _, dir := range []string{globalOptions.DeployDir, globalOptions.LogDir} { - if dir == "" { - continue - } - dirs = append(dirs, spec.Abs(globalOptions.User, dir)) - } - // the default, relative path of data dir is under deploy dir - if strings.HasPrefix(globalOptions.DataDir, "/") { - dirs = append(dirs, globalOptions.DataDir) - } - t := task.NewBuilder(). - RootSSH( - inst.GetHost(), - inst.GetSSHPort(), - opt.User, - sshConnProps.Password, - sshConnProps.IdentityFile, - sshConnProps.IdentityFilePassphrase, - sshTimeout, - sshType, - globalOptions.SSHType, - ). - EnvInit(inst.GetHost(), globalOptions.User, globalOptions.Group, opt.SkipCreateUser || globalOptions.User == opt.User). - Mkdir(globalOptions.User, inst.GetHost(), dirs...). - BuildAsStep(fmt.Sprintf(" - Prepare %s:%d", inst.GetHost(), inst.GetSSHPort())) - envInitTasks = append(envInitTasks, t) - } - }) - - if iterErr != nil { - return iterErr - } - - // Download missing component - downloadCompTasks = BuildDownloadCompTasks(clusterVersion, topo, m.bindVersion) - - // Deploy components to remote - topo.IterInstance(func(inst spec.Instance) { - version := m.bindVersion(inst.ComponentName(), clusterVersion) - deployDir := spec.Abs(globalOptions.User, inst.DeployDir()) - // data dir would be empty for components which don't need it - dataDirs := spec.MultiDirAbs(globalOptions.User, inst.DataDir()) - // log dir will always be with values, but might not used by the component - logDir := spec.Abs(globalOptions.User, inst.LogDir()) - // Deploy component - // prepare deployment server - deployDirs := []string{ - deployDir, logDir, - filepath.Join(deployDir, "bin"), - filepath.Join(deployDir, "conf"), - filepath.Join(deployDir, "scripts"), - } - if globalOptions.TLSEnabled { - deployDirs = append(deployDirs, filepath.Join(deployDir, "tls")) - } - t := task.NewBuilder(). - UserSSH(inst.GetHost(), inst.GetSSHPort(), globalOptions.User, sshTimeout, sshType, globalOptions.SSHType). - Mkdir(globalOptions.User, inst.GetHost(), deployDirs...). - Mkdir(globalOptions.User, inst.GetHost(), dataDirs...) - - if deployerInstance, ok := inst.(DeployerInstance); ok { - deployerInstance.Deploy(t, "", deployDir, version, clusterName, clusterVersion) - } else { - // copy dependency component if needed - switch inst.ComponentName() { - case spec.ComponentTiSpark: - env := environment.GlobalEnv() - var sparkVer pkgver.Version - if sparkVer, _, iterErr = env.V1Repository().LatestStableVersion(spec.ComponentSpark, false); iterErr != nil { - return - } - t = t.DeploySpark(inst, sparkVer.String(), "" /* default srcPath */, deployDir) - default: - t = t.CopyComponent( - inst.ComponentName(), - inst.OS(), - inst.Arch(), - version, - "", // use default srcPath - inst.GetHost(), - deployDir, - ) - } - } - - // generate and transfer tls cert for instance - if globalOptions.TLSEnabled { - t = t.TLSCert(inst, ca, meta.DirPaths{ - Deploy: deployDir, - Cache: m.specManager.Path(clusterName, spec.TempConfigPath), - }) - } - - // generate configs for the component - t = t.InitConfig( - clusterName, - clusterVersion, - m.specManager, - inst, - globalOptions.User, - opt.IgnoreConfigCheck, - meta.DirPaths{ - Deploy: deployDir, - Data: dataDirs, - Log: logDir, - Cache: m.specManager.Path(clusterName, spec.TempConfigPath), - }, - ) - - deployCompTasks = append(deployCompTasks, - t.BuildAsStep(fmt.Sprintf(" - Copy %s -> %s", inst.ComponentName(), inst.GetHost())), - ) - }) - - if iterErr != nil { - return iterErr - } - - // Deploy monitor relevant components to remote - dlTasks, dpTasks := buildMonitoredDeployTask( - m.bindVersion, - m.specManager, - clusterName, - uniqueHosts, - globalOptions, - topo.GetMonitoredOptions(), - clusterVersion, - sshTimeout, - sshType, - ) - downloadCompTasks = append(downloadCompTasks, dlTasks...) - deployCompTasks = append(deployCompTasks, dpTasks...) - - builder := task.NewBuilder(). - Step("+ Generate SSH keys", - task.NewBuilder().SSHKeyGen(m.specManager.Path(clusterName, "ssh", "id_rsa")).Build()). - ParallelStep("+ Download TiDB components", false, downloadCompTasks...). - ParallelStep("+ Initialize target host environments", false, envInitTasks...). - ParallelStep("+ Copy files", false, deployCompTasks...) - - if afterDeploy != nil { - afterDeploy(builder, topo) - } - - t := builder.Build() - - if err := t.Execute(task.NewContext()); err != nil { - if errorx.Cast(err) != nil { - // FIXME: Map possible task errors and give suggestions. - return err - } - return perrs.AddStack(err) - } - - metadata.SetUser(globalOptions.User) - metadata.SetVersion(clusterVersion) - err = m.specManager.SaveMeta(clusterName, metadata) - - if err != nil { - return perrs.AddStack(err) - } - - hint := color.New(color.Bold).Sprintf("%s start %s", cliutil.OsArgs0(), clusterName) - log.Infof("Deployed cluster `%s` successfully, you can start the cluster via `%s`", clusterName, hint) - return nil -} - -// ScaleIn the cluster. -func (m *Manager) ScaleIn( - clusterName string, - skipConfirm bool, - optTimeout uint64, - sshTimeout uint64, - sshType executor.SSHType, - force bool, - nodes []string, - scale func(builer *task.Builder, metadata spec.Metadata, tlsCfg *tls.Config), -) error { - if !skipConfirm { - if err := cliutil.PromptForConfirmOrAbortError( - "This operation will delete the %s nodes in `%s` and all their data.\nDo you want to continue? [y/N]:", - strings.Join(nodes, ","), - color.HiYellowString(clusterName)); err != nil { - return err - } - - if force { - if err := cliutil.PromptForConfirmOrAbortError( - "Forcing scale in is unsafe and may result in data lost for stateful components.\nDo you want to continue? [y/N]:", - ); err != nil { - return err - } - } - - log.Infof("Scale-in nodes...") - } - - metadata, err := m.meta(clusterName) - if err != nil && !errors.Is(perrs.Cause(err), meta.ErrValidate) && - !errors.Is(perrs.Cause(err), spec.ErrMultipleTiSparkMaster) && - !errors.Is(perrs.Cause(err), spec.ErrMultipleTisparkWorker) { - // ignore conflict check error, node may be deployed by former version - // that lack of some certain conflict checks - return perrs.AddStack(err) - } - - topo := metadata.GetTopology() - base := metadata.GetBaseMeta() - - // Regenerate configuration - var regenConfigTasks []task.Task - hasImported := false - deletedNodes := set.NewStringSet(nodes...) - topo.IterInstance(func(instance spec.Instance) { - if deletedNodes.Exist(instance.ID()) { - return - } - deployDir := spec.Abs(base.User, instance.DeployDir()) - // data dir would be empty for components which don't need it - dataDirs := spec.MultiDirAbs(base.User, instance.DataDir()) - // log dir will always be with values, but might not used by the component - logDir := spec.Abs(base.User, instance.LogDir()) - - // Download and copy the latest component to remote if the cluster is imported from Ansible - tb := task.NewBuilder() - if instance.IsImported() { - switch compName := instance.ComponentName(); compName { - case spec.ComponentGrafana, spec.ComponentPrometheus, spec.ComponentAlertmanager: - version := m.bindVersion(compName, base.Version) - tb.Download(compName, instance.OS(), instance.Arch(), version). - CopyComponent( - compName, - instance.OS(), - instance.Arch(), - version, - "", // use default srcPath - instance.GetHost(), - deployDir, - ) - } - hasImported = true - } - - t := tb.InitConfig(clusterName, - base.Version, - m.specManager, - instance, - base.User, - true, // always ignore config check result in scale in - meta.DirPaths{ - Deploy: deployDir, - Data: dataDirs, - Log: logDir, - Cache: m.specManager.Path(clusterName, spec.TempConfigPath), - }, - ).Build() - regenConfigTasks = append(regenConfigTasks, t) - }) - - // handle dir scheme changes - if hasImported { - if err := spec.HandleImportPathMigration(clusterName); err != nil { - return err - } - } - - tlsCfg, err := topo.TLSConfig(m.specManager.Path(clusterName, spec.TLSCertKeyDir)) - if err != nil { - return err - } - - b := task.NewBuilder(). - SSHKeySet( - m.specManager.Path(clusterName, "ssh", "id_rsa"), - m.specManager.Path(clusterName, "ssh", "id_rsa.pub")). - ClusterSSH(topo, base.User, sshTimeout, sshType, metadata.GetTopology().BaseTopo().GlobalOptions.SSHType) - - scale(b, metadata, tlsCfg) - - t := b.Parallel(force, regenConfigTasks...).Parallel(force, buildDynReloadProm(metadata.GetTopology())...).Build() - - if err := t.Execute(task.NewContext()); err != nil { - if errorx.Cast(err) != nil { - // FIXME: Map possible task errors and give suggestions. - return err - } - return perrs.Trace(err) - } - - log.Infof("Scaled cluster `%s` in successfully", clusterName) - - return nil -} - -// ScaleOut scale out the cluster. -func (m *Manager) ScaleOut( - clusterName string, - topoFile string, - afterDeploy func(b *task.Builder, newPart spec.Topology), - final func(b *task.Builder, name string, meta spec.Metadata), - opt ScaleOutOptions, - skipConfirm bool, - optTimeout uint64, - sshTimeout uint64, - sshType executor.SSHType, -) error { - metadata, err := m.meta(clusterName) - // allow specific validation errors so that user can recover a broken - // cluster if it is somehow in a bad state. - if err != nil && - !errors.Is(perrs.Cause(err), spec.ErrNoTiSparkMaster) { - return perrs.AddStack(err) - } - - topo := metadata.GetTopology() - base := metadata.GetBaseMeta() - // Inherit existing global configuration. We must assign the inherited values before unmarshalling - // because some default value rely on the global options and monitored options. - newPart := topo.NewPart() - - // The no tispark master error is ignored, as if the tispark master is removed from the topology - // file for some reason (manual edit, for example), it is still possible to scale-out it to make - // the whole topology back to normal state. - if err := spec.ParseTopologyYaml(topoFile, newPart); err != nil && - !errors.Is(perrs.Cause(err), spec.ErrNoTiSparkMaster) { - return err - } - - if err := validateNewTopo(newPart); err != nil { - return err - } - - // Abort scale out operation if the merged topology is invalid - mergedTopo := topo.MergeTopo(newPart) - if err := mergedTopo.Validate(); err != nil { - return err - } - spec.ExpandRelativeDir(mergedTopo) - - if topo, ok := topo.(*spec.Specification); ok && !opt.NoLabels { - // Check if TiKV's label set correctly - pdList := topo.BaseTopo().MasterList - tlsCfg, err := topo.TLSConfig(m.specManager.Path(clusterName, spec.TLSCertKeyDir)) - if err != nil { - return err - } - pdClient := api.NewPDClient(pdList, 10*time.Second, tlsCfg) - lbs, err := pdClient.GetLocationLabels() - if err != nil { - return err - } - if err := spec.CheckTiKVLabels(lbs, mergedTopo.(*spec.Specification)); err != nil { - return perrs.Errorf("check TiKV label failed, please fix that before continue:\n%s", err) - } - } - - clusterList, err := m.specManager.GetAllClusters() - if err != nil { - return err - } - if err := spec.CheckClusterPortConflict(clusterList, clusterName, mergedTopo); err != nil { - return err - } - if err := spec.CheckClusterDirConflict(clusterList, clusterName, mergedTopo); err != nil { - return err - } - - patchedComponents := set.NewStringSet() - newPart.IterInstance(func(instance spec.Instance) { - if utils.IsExist(m.specManager.Path(clusterName, spec.PatchDirName, instance.ComponentName()+".tar.gz")) { - patchedComponents.Insert(instance.ComponentName()) - } - }) - - if !skipConfirm { - // patchedComponents are components that have been patched and overwrited - if err := m.confirmTopology(clusterName, base.Version, newPart, patchedComponents); err != nil { - return err - } - } - - var sshConnProps *cliutil.SSHConnectionProps = &cliutil.SSHConnectionProps{} - if sshType != executor.SSHTypeNone { - var err error - if sshConnProps, err = cliutil.ReadIdentityFileOrPassword(opt.IdentityFile, opt.UsePassword); err != nil { - return err - } - } - - // Build the scale out tasks - t, err := buildScaleOutTask( - m, clusterName, metadata, mergedTopo, opt, sshConnProps, newPart, - patchedComponents, optTimeout, sshTimeout, sshType, afterDeploy, final) - if err != nil { - return err - } - - if err := t.Execute(task.NewContext()); err != nil { - if errorx.Cast(err) != nil { - // FIXME: Map possible task errors and give suggestions. - return err - } - return perrs.Trace(err) - } - - log.Infof("Scaled cluster `%s` out successfully", clusterName) - - return nil -} - -// DestroyTombstone destroy and remove instances that is in tombstone state -func (m *Manager) DestroyTombstone( - clusterName string, - gOpt operator.Options, - skipConfirm bool, -) error { - var ( - sshTimeout = gOpt.SSHTimeout - sshType = gOpt.SSHType - ) - - metadata, err := m.meta(clusterName) - // allow specific validation errors so that user can recover a broken - // cluster if it is somehow in a bad state. - if err != nil && - !errors.Is(perrs.Cause(err), spec.ErrNoTiSparkMaster) { - return perrs.AddStack(err) - } - - topo := metadata.GetTopology() - base := metadata.GetBaseMeta() - - clusterMeta := metadata.(*spec.ClusterMeta) - cluster := clusterMeta.Topology - - if !operator.NeedCheckTombstone(cluster) { - return nil - } - - tlsCfg, err := topo.TLSConfig(m.specManager.Path(clusterName, spec.TLSCertKeyDir)) - if err != nil { - return err - } - - b := task.NewBuilder(). - SSHKeySet( - m.specManager.Path(clusterName, "ssh", "id_rsa"), - m.specManager.Path(clusterName, "ssh", "id_rsa.pub")). - ClusterSSH(topo, base.User, sshTimeout, sshType, metadata.GetTopology().BaseTopo().GlobalOptions.SSHType) - - var nodes []string - b. - Func("FindTomestoneNodes", func(ctx *task.Context) (err error) { - nodes, err = operator.DestroyTombstone(ctx, cluster, true /* returnNodesOnly */, gOpt, tlsCfg) - if !skipConfirm { - err = cliutil.PromptForConfirmOrAbortError( - color.HiYellowString(fmt.Sprintf("Will destroy these nodes: %v\nDo you confirm this action? [y/N]:", nodes)), - ) - if err != nil { - return err - } - } - log.Infof("Start destroy Tombstone nodes: %v ...", nodes) - return err - }). - ClusterOperate(cluster, operator.DestroyTombstoneOperation, gOpt, tlsCfg). - UpdateMeta(clusterName, clusterMeta, nodes). - UpdateTopology(clusterName, m.specManager.Path(clusterName), clusterMeta, nodes) - - var regenConfigTasks []task.Task - deletedNodes := set.NewStringSet(nodes...) - topo.IterInstance(func(instance spec.Instance) { - if deletedNodes.Exist(instance.ID()) { - return - } - deployDir := spec.Abs(base.User, instance.DeployDir()) - // data dir would be empty for components which don't need it - dataDirs := spec.MultiDirAbs(base.User, instance.DataDir()) - // log dir will always be with values, but might not used by the component - logDir := spec.Abs(base.User, instance.LogDir()) - - // Download and copy the latest component to remote if the cluster is imported from Ansible - tb := task.NewBuilder() - if instance.IsImported() { - switch compName := instance.ComponentName(); compName { - case spec.ComponentGrafana, spec.ComponentPrometheus, spec.ComponentAlertmanager: - version := m.bindVersion(compName, base.Version) - tb.Download(compName, instance.OS(), instance.Arch(), version). - CopyComponent( - compName, - instance.OS(), - instance.Arch(), - version, - "", // use default srcPath - instance.GetHost(), - deployDir, - ) - } - } - - t := tb.InitConfig(clusterName, - base.Version, - m.specManager, - instance, - base.User, - true, // always ignore config check result in scale in - meta.DirPaths{ - Deploy: deployDir, - Data: dataDirs, - Log: logDir, - Cache: m.specManager.Path(clusterName, spec.TempConfigPath), - }, - ).Build() - regenConfigTasks = append(regenConfigTasks, t) - }) - - t := b.Parallel(true, regenConfigTasks...).Parallel(true, buildDynReloadProm(metadata.GetTopology())...).Build() - if err := t.Execute(task.NewContext()); err != nil { - if errorx.Cast(err) != nil { - // FIXME: Map possible task errors and give suggestions. - return err - } - return perrs.Trace(err) - } - - log.Infof("Destroy success") - - return nil -} - -func (m *Manager) meta(name string) (metadata spec.Metadata, err error) { - exist, err := m.specManager.Exist(name) - if err != nil { - return nil, perrs.AddStack(err) - } - - if !exist { - return nil, perrs.Errorf("%s cluster `%s` not exists", m.sysName, name) - } - - metadata = m.specManager.NewMetadata() - err = m.specManager.Metadata(name, metadata) - if err != nil { - return metadata, perrs.AddStack(err) - } - - return metadata, nil -} - -// 1. Write Topology to a temporary file. -// 2. Open file in editor. -// 3. Check and update Topology. -// 4. Save meta file. -func (m *Manager) editTopo(origTopo spec.Topology, data []byte, skipConfirm bool) (spec.Topology, error) { - file, err := ioutil.TempFile(os.TempDir(), "*") - if err != nil { - return nil, perrs.AddStack(err) - } - - name := file.Name() - - _, err = io.Copy(file, bytes.NewReader(data)) - if err != nil { - return nil, perrs.AddStack(err) - } - - err = file.Close() - if err != nil { - return nil, perrs.AddStack(err) - } - - err = utils.OpenFileInEditor(name) - if err != nil { - return nil, perrs.AddStack(err) - } - - // Now user finish editing the file. - newData, err := ioutil.ReadFile(name) - if err != nil { - return nil, perrs.AddStack(err) - } - - newTopo := m.specManager.NewMetadata().GetTopology() - err = yaml.UnmarshalStrict(newData, newTopo) - if err != nil { - fmt.Print(color.RedString("New topology could not be saved: ")) - log.Infof("Failed to parse topology file: %v", err) - if !cliutil.PromptForConfirmNo("Do you want to continue editing? [Y/n]: ") { - return m.editTopo(origTopo, newData, skipConfirm) - } - log.Infof("Nothing changed.") - return nil, nil - } - - // report error if immutable field has been changed - if err := utils.ValidateSpecDiff(origTopo, newTopo); err != nil { - fmt.Print(color.RedString("New topology could not be saved: ")) - log.Errorf("%s", err) - if !cliutil.PromptForConfirmNo("Do you want to continue editing? [Y/n]: ") { - return m.editTopo(origTopo, newData, skipConfirm) - } - log.Infof("Nothing changed.") - return nil, nil - } - - origData, err := yaml.Marshal(origTopo) - if err != nil { - return nil, perrs.AddStack(err) - } - - if bytes.Equal(origData, newData) { - log.Infof("The file has nothing changed") - return nil, nil - } - - utils.ShowDiff(string(origData), string(newData), os.Stdout) - - if !skipConfirm { - if err := cliutil.PromptForConfirmOrAbortError( - color.HiYellowString("Please check change highlight above, do you want to apply the change? [y/N]:"), - ); err != nil { - return nil, err - } - } - - return newTopo, nil -} - -func formatInstanceStatus(status string) string { - lowercaseStatus := strings.ToLower(status) - - startsWith := func(prefixs ...string) bool { - for _, prefix := range prefixs { - if strings.HasPrefix(lowercaseStatus, prefix) { - return true - } - } - return false - } - - switch { - case startsWith("up|l"): // up|l, up|l|ui - return color.HiGreenString(status) - case startsWith("up"): - return color.GreenString(status) - case startsWith("down", "err"): // down, down|ui - return color.RedString(status) - case startsWith("tombstone", "disconnected"), strings.Contains(status, "offline"): - return color.YellowString(status) - default: - return status - } -} - -func versionCompare(curVersion, newVersion string) error { - // Can always upgrade to 'nightly' event the current version is 'nightly' - if newVersion == version.NightlyVersion { - return nil - } - - switch semver.Compare(curVersion, newVersion) { - case -1: - return nil - case 0, 1: - return perrs.Errorf("please specify a higher version than %s", curVersion) - default: - return perrs.Errorf("unreachable") - } -} - -type componentInfo struct { - component string - version string -} - -func instancesToPatch(topo spec.Topology, options operator.Options) ([]spec.Instance, error) { - roleFilter := set.NewStringSet(options.Roles...) - nodeFilter := set.NewStringSet(options.Nodes...) - components := topo.ComponentsByStartOrder() - components = operator.FilterComponent(components, roleFilter) - - instances := []spec.Instance{} - comps := []string{} - for _, com := range components { - insts := operator.FilterInstance(com.Instances(), nodeFilter) - if len(insts) > 0 { - comps = append(comps, com.Name()) - } - instances = append(instances, insts...) - } - if len(comps) > 1 { - return nil, fmt.Errorf("can't patch more than one component at once: %v", comps) - } - - if len(instances) == 0 { - return nil, fmt.Errorf("no instance found on specifid role(%v) and nodes(%v)", options.Roles, options.Nodes) - } - - return instances, nil -} - -func checkPackage(bindVersion spec.BindVersion, specManager *spec.SpecManager, clusterName, comp, nodeOS, arch, packagePath string) error { - metadata := specManager.NewMetadata() - if err := specManager.Metadata(clusterName, metadata); err != nil { - return err - } - - ver := bindVersion(comp, metadata.GetBaseMeta().Version) - repo, err := clusterutil.NewRepository(nodeOS, arch) - if err != nil { - return err - } - entry, err := repo.ComponentBinEntry(comp, ver) - if err != nil { - return err - } - - checksum, err := utils.Checksum(packagePath) - if err != nil { - return err - } - cacheDir := specManager.Path(clusterName, "cache", comp+"-"+checksum[:7]) - if err := os.MkdirAll(cacheDir, 0755); err != nil { - return err - } - if err := exec.Command("tar", "-xvf", packagePath, "-C", cacheDir).Run(); err != nil { - return err - } - - if exists := utils.IsExist(path.Join(cacheDir, entry)); !exists { - return fmt.Errorf("entry %s not found in package %s", entry, packagePath) - } - - return nil -} - -func overwritePatch(specManager *spec.SpecManager, clusterName, comp, packagePath string) error { - if err := os.MkdirAll(specManager.Path(clusterName, spec.PatchDirName), 0755); err != nil { - return err - } - - checksum, err := utils.Checksum(packagePath) - if err != nil { - return err - } - - tg := specManager.Path(clusterName, spec.PatchDirName, comp+"-"+checksum[:7]+".tar.gz") - if !utils.IsExist(tg) { - if err := utils.Copy(packagePath, tg); err != nil { - return err - } - } - - symlink := specManager.Path(clusterName, spec.PatchDirName, comp+".tar.gz") - if utils.IsSymExist(symlink) { - os.Remove(symlink) - } - return os.Symlink(tg, symlink) -} - -// validateNewTopo checks the new part of scale-out topology to make sure it's supported -func validateNewTopo(topo spec.Topology) (err error) { - topo.IterInstance(func(instance spec.Instance) { - // check for "imported" parameter, it can not be true when scaling out - if instance.IsImported() { - err = errors.New( - "'imported' is set to 'true' for new instance, this is only used " + - "for instances imported from tidb-ansible and make no sense when " + - "scaling out, please delete the line or set it to 'false' for new instances") - return - } - }) - return err -} - -func (m *Manager) confirmTopology(clusterName, version string, topo spec.Topology, patchedRoles set.StringSet) error { - log.Infof("Please confirm your topology:") - - cyan := color.New(color.FgCyan, color.Bold) - fmt.Printf("Cluster type: %s\n", cyan.Sprint(m.sysName)) - fmt.Printf("Cluster name: %s\n", cyan.Sprint(clusterName)) - fmt.Printf("Cluster version: %s\n", cyan.Sprint(version)) - if topo.BaseTopo().GlobalOptions.TLSEnabled { - fmt.Printf("TLS encryption: %s\n", cyan.Sprint("enabled")) - } - - clusterTable := [][]string{ - // Header - {"Type", "Host", "Ports", "OS/Arch", "Directories"}, - } - - topo.IterInstance(func(instance spec.Instance) { - comp := instance.ComponentName() - if patchedRoles.Exist(comp) { - comp += " (patched)" - } - clusterTable = append(clusterTable, []string{ - comp, - instance.GetHost(), - utils.JoinInt(instance.UsedPorts(), "/"), - cliutil.OsArch(instance.OS(), instance.Arch()), - strings.Join(instance.UsedDirs(), ","), - }) - }) - - cliutil.PrintTable(clusterTable, true) - - log.Warnf("Attention:") - log.Warnf(" 1. If the topology is not what you expected, check your yaml file.") - log.Warnf(" 2. Please confirm there is no port/directory conflicts in same host.") - if len(patchedRoles) != 0 { - log.Errorf(" 3. The component marked as `patched` has been replaced by previous patch commanm.") - } - - if spec, ok := topo.(*spec.Specification); ok { - if len(spec.TiSparkMasters) > 0 || len(spec.TiSparkWorkers) > 0 { - cyan := color.New(color.FgCyan, color.Bold) - msg := cyan.Sprint(`There are TiSpark nodes defined in the topology, please note that you'll need to manually install Java Runtime Environment (JRE) 8 on the host, otherwise the TiSpark nodes will fail to start. -You may read the OpenJDK doc for a reference: https://openjdk.java.net/install/ - `) - log.Warnf(msg) - } - } - - return cliutil.PromptForConfirmOrAbortError("Do you want to continue? [y/N]: ") -} - -// Dynamic reload Prometheus configuration -func buildDynReloadProm(topo spec.Topology) []task.Task { - monitor := spec.FindComponent(topo, spec.ComponentPrometheus) - if monitor == nil { - return nil - } - instances := monitor.Instances() - if len(instances) == 0 { - return nil - } - var dynReloadTasks []task.Task - for _, inst := range monitor.Instances() { - dynReloadTasks = append(dynReloadTasks, task.NewBuilder().SystemCtl(inst.GetHost(), inst.ServiceName(), "reload", true).Build()) - } - return dynReloadTasks -} - -func buildScaleOutTask( - m *Manager, - clusterName string, - metadata spec.Metadata, - mergedTopo spec.Topology, - opt ScaleOutOptions, - sshConnProps *cliutil.SSHConnectionProps, - newPart spec.Topology, - patchedComponents set.StringSet, - optTimeout uint64, - sshTimeout uint64, - sshType executor.SSHType, - afterDeploy func(b *task.Builder, newPart spec.Topology), - final func(b *task.Builder, name string, meta spec.Metadata), -) (task.Task, error) { - var ( - envInitTasks []task.Task // tasks which are used to initialize environment - downloadCompTasks []task.Task // tasks which are used to download components - deployCompTasks []task.Task // tasks which are used to copy components to remote host - refreshConfigTasks []task.Task // tasks which are used to refresh configuration - ) - - topo := metadata.GetTopology() - base := metadata.GetBaseMeta() - specManager := m.specManager - - tlsCfg, err := topo.TLSConfig(m.specManager.Path(clusterName, spec.TLSCertKeyDir)) - if err != nil { - return nil, err - } - - // Initialize the environments - initializedHosts := set.NewStringSet() - metadata.GetTopology().IterInstance(func(instance spec.Instance) { - initializedHosts.Insert(instance.GetHost()) - }) - // uninitializedHosts are hosts which haven't been initialized yet - uninitializedHosts := make(map[string]hostInfo) // host -> ssh-port, os, arch - newPart.IterInstance(func(instance spec.Instance) { - if host := instance.GetHost(); !initializedHosts.Exist(host) { - if _, found := uninitializedHosts[host]; found { - return - } - - uninitializedHosts[host] = hostInfo{ - ssh: instance.GetSSHPort(), - os: instance.OS(), - arch: instance.Arch(), - } - - var dirs []string - globalOptions := metadata.GetTopology().BaseTopo().GlobalOptions - for _, dir := range []string{globalOptions.DeployDir, globalOptions.DataDir, globalOptions.LogDir} { - for _, dirname := range strings.Split(dir, ",") { - if dirname == "" { - continue - } - dirs = append(dirs, spec.Abs(globalOptions.User, dirname)) - } - } - t := task.NewBuilder(). - RootSSH( - instance.GetHost(), - instance.GetSSHPort(), - opt.User, - sshConnProps.Password, - sshConnProps.IdentityFile, - sshConnProps.IdentityFilePassphrase, - sshTimeout, - sshType, - globalOptions.SSHType, - ). - EnvInit(instance.GetHost(), base.User, base.Group, opt.SkipCreateUser || globalOptions.User == opt.User). - Mkdir(globalOptions.User, instance.GetHost(), dirs...). - Build() - envInitTasks = append(envInitTasks, t) - } - }) - - // Download missing component - downloadCompTasks = convertStepDisplaysToTasks(BuildDownloadCompTasks(base.Version, newPart, m.bindVersion)) - - var iterErr error - // Deploy the new topology and refresh the configuration - newPart.IterInstance(func(inst spec.Instance) { - version := m.bindVersion(inst.ComponentName(), base.Version) - deployDir := spec.Abs(base.User, inst.DeployDir()) - // data dir would be empty for components which don't need it - dataDirs := spec.MultiDirAbs(base.User, inst.DataDir()) - // log dir will always be with values, but might not used by the component - logDir := spec.Abs(base.User, inst.LogDir()) - - deployDirs := []string{ - deployDir, logDir, - filepath.Join(deployDir, "bin"), - filepath.Join(deployDir, "conf"), - filepath.Join(deployDir, "scripts"), - } - if topo.BaseTopo().GlobalOptions.TLSEnabled { - deployDirs = append(deployDirs, filepath.Join(deployDir, "tls")) - } - // Deploy component - tb := task.NewBuilder(). - UserSSH(inst.GetHost(), inst.GetSSHPort(), base.User, sshTimeout, sshType, topo.BaseTopo().GlobalOptions.SSHType). - Mkdir(base.User, inst.GetHost(), deployDirs...). - Mkdir(base.User, inst.GetHost(), dataDirs...) - - srcPath := "" - if patchedComponents.Exist(inst.ComponentName()) { - srcPath = specManager.Path(clusterName, spec.PatchDirName, inst.ComponentName()+".tar.gz") - } - - if deployerInstance, ok := inst.(DeployerInstance); ok { - deployerInstance.Deploy(tb, srcPath, deployDir, version, clusterName, version) - } else { - // copy dependency component if needed - switch inst.ComponentName() { - case spec.ComponentTiSpark: - env := environment.GlobalEnv() - var sparkVer pkgver.Version - if sparkVer, _, iterErr = env.V1Repository().LatestStableVersion(spec.ComponentSpark, false); iterErr != nil { - return - } - tb = tb.DeploySpark(inst, sparkVer.String(), srcPath, deployDir) - default: - tb.CopyComponent( - inst.ComponentName(), - inst.OS(), - inst.Arch(), - version, - srcPath, - inst.GetHost(), - deployDir, - ) - } - } - // generate and transfer tls cert for instance - if topo.BaseTopo().GlobalOptions.TLSEnabled { - ca, err := crypto.ReadCA( - clusterName, - m.specManager.Path(clusterName, spec.TLSCertKeyDir, spec.TLSCACert), - m.specManager.Path(clusterName, spec.TLSCertKeyDir, spec.TLSCAKey), - ) - if err != nil { - iterErr = err - return - } - tb = tb.TLSCert(inst, ca, meta.DirPaths{ - Deploy: deployDir, - Cache: m.specManager.Path(clusterName, spec.TempConfigPath), - }) - } - - t := tb.ScaleConfig(clusterName, - base.Version, - m.specManager, - topo, - inst, - base.User, - meta.DirPaths{ - Deploy: deployDir, - Data: dataDirs, - Log: logDir, - }, - ).Build() - deployCompTasks = append(deployCompTasks, t) - }) - if iterErr != nil { - return nil, iterErr - } - - hasImported := false - - mergedTopo.IterInstance(func(inst spec.Instance) { - deployDir := spec.Abs(base.User, inst.DeployDir()) - // data dir would be empty for components which don't need it - dataDirs := spec.MultiDirAbs(base.User, inst.DataDir()) - // log dir will always be with values, but might not used by the component - logDir := spec.Abs(base.User, inst.LogDir()) - - // Download and copy the latest component to remote if the cluster is imported from Ansible - tb := task.NewBuilder() - if inst.IsImported() { - switch compName := inst.ComponentName(); compName { - case spec.ComponentGrafana, spec.ComponentPrometheus, spec.ComponentAlertmanager: - version := m.bindVersion(compName, base.Version) - tb.Download(compName, inst.OS(), inst.Arch(), version). - CopyComponent(compName, inst.OS(), inst.Arch(), version, "", inst.GetHost(), deployDir) - } - hasImported = true - } - - // Refresh all configuration - t := tb.InitConfig(clusterName, - base.Version, - m.specManager, - inst, - base.User, - true, // always ignore config check result in scale out - meta.DirPaths{ - Deploy: deployDir, - Data: dataDirs, - Log: logDir, - Cache: specManager.Path(clusterName, spec.TempConfigPath), - }, - ).Build() - refreshConfigTasks = append(refreshConfigTasks, t) - }) - - // handle dir scheme changes - if hasImported { - if err := spec.HandleImportPathMigration(clusterName); err != nil { - return task.NewBuilder().Build(), err - } - } - - // Deploy monitor relevant components to remote - dlTasks, dpTasks := buildMonitoredDeployTask( - m.bindVersion, - specManager, - clusterName, - uninitializedHosts, - topo.BaseTopo().GlobalOptions, - topo.BaseTopo().MonitoredOptions, - base.Version, - sshTimeout, - sshType, - ) - downloadCompTasks = append(downloadCompTasks, convertStepDisplaysToTasks(dlTasks)...) - deployCompTasks = append(deployCompTasks, convertStepDisplaysToTasks(dpTasks)...) - - builder := task.NewBuilder(). - SSHKeySet( - specManager.Path(clusterName, "ssh", "id_rsa"), - specManager.Path(clusterName, "ssh", "id_rsa.pub")). - Parallel(false, downloadCompTasks...). - Parallel(false, envInitTasks...). - ClusterSSH(topo, base.User, sshTimeout, sshType, topo.BaseTopo().GlobalOptions.SSHType). - Parallel(false, deployCompTasks...) - - if afterDeploy != nil { - afterDeploy(builder, newPart) - } - - builder. - ClusterSSH(newPart, base.User, sshTimeout, sshType, topo.BaseTopo().GlobalOptions.SSHType). - Func("Save meta", func(_ *task.Context) error { - metadata.SetTopology(mergedTopo) - return m.specManager.SaveMeta(clusterName, metadata) - }). - Func("StartCluster", func(ctx *task.Context) error { - return operator.Start(ctx, newPart, operator.Options{OptTimeout: optTimeout}, tlsCfg) - }). - Parallel(false, refreshConfigTasks...). - Parallel(false, buildDynReloadProm(metadata.GetTopology())...) - - if final != nil { - final(builder, clusterName, metadata) - } - - return builder.Build(), nil -} - -type hostInfo struct { - ssh int // ssh port of host - os string // operating system - arch string // cpu architecture - // vendor string -} - -// Deprecated -func convertStepDisplaysToTasks(t []*task.StepDisplay) []task.Task { - tasks := make([]task.Task, 0, len(t)) - for _, sd := range t { - tasks = append(tasks, sd) - } - return tasks -} - -func buildMonitoredDeployTask( - bindVersion spec.BindVersion, - specManager *spec.SpecManager, - clusterName string, - uniqueHosts map[string]hostInfo, // host -> ssh-port, os, arch - globalOptions *spec.GlobalOptions, - monitoredOptions *spec.MonitoredOptions, - version string, - sshTimeout uint64, - sshType executor.SSHType, -) (downloadCompTasks []*task.StepDisplay, deployCompTasks []*task.StepDisplay) { - if monitoredOptions == nil { - return - } - - uniqueCompOSArch := make(map[string]struct{}) // comp-os-arch -> {} - // monitoring agents - for _, comp := range []string{spec.ComponentNodeExporter, spec.ComponentBlackboxExporter} { - version := bindVersion(comp, version) - - for host, info := range uniqueHosts { - // populate unique os/arch set - key := fmt.Sprintf("%s-%s-%s", comp, info.os, info.arch) - if _, found := uniqueCompOSArch[key]; !found { - uniqueCompOSArch[key] = struct{}{} - downloadCompTasks = append(downloadCompTasks, task.NewBuilder(). - Download(comp, info.os, info.arch, version). - BuildAsStep(fmt.Sprintf(" - Download %s:%s (%s/%s)", comp, version, info.os, info.arch))) - } - - deployDir := spec.Abs(globalOptions.User, monitoredOptions.DeployDir) - // data dir would be empty for components which don't need it - dataDir := monitoredOptions.DataDir - // the default data_dir is relative to deploy_dir - if dataDir != "" && !strings.HasPrefix(dataDir, "/") { - dataDir = filepath.Join(deployDir, dataDir) - } - // log dir will always be with values, but might not used by the component - logDir := spec.Abs(globalOptions.User, monitoredOptions.LogDir) - // Deploy component - t := task.NewBuilder(). - UserSSH(host, info.ssh, globalOptions.User, sshTimeout, sshType, globalOptions.SSHType). - Mkdir(globalOptions.User, host, - deployDir, dataDir, logDir, - filepath.Join(deployDir, "bin"), - filepath.Join(deployDir, "conf"), - filepath.Join(deployDir, "scripts")). - CopyComponent( - comp, - info.os, - info.arch, - version, - "", - host, - deployDir, - ). - MonitoredConfig( - clusterName, - comp, - host, - globalOptions.ResourceControl, - monitoredOptions, - globalOptions.User, - meta.DirPaths{ - Deploy: deployDir, - Data: []string{dataDir}, - Log: logDir, - Cache: specManager.Path(clusterName, spec.TempConfigPath), - }, - ). - BuildAsStep(fmt.Sprintf(" - Copy %s -> %s", comp, host)) - deployCompTasks = append(deployCompTasks, t) - } - } - return -} - -func refreshMonitoredConfigTask( - specManager *spec.SpecManager, - clusterName string, - uniqueHosts map[string]hostInfo, // host -> ssh-port, os, arch - globalOptions spec.GlobalOptions, - monitoredOptions *spec.MonitoredOptions, - sshTimeout uint64, - sshType executor.SSHType, -) []*task.StepDisplay { - if monitoredOptions == nil { - return nil - } - - tasks := []*task.StepDisplay{} - // monitoring agents - for _, comp := range []string{spec.ComponentNodeExporter, spec.ComponentBlackboxExporter} { - for host, info := range uniqueHosts { - deployDir := spec.Abs(globalOptions.User, monitoredOptions.DeployDir) - // data dir would be empty for components which don't need it - dataDir := monitoredOptions.DataDir - // the default data_dir is relative to deploy_dir - if dataDir != "" && !strings.HasPrefix(dataDir, "/") { - dataDir = filepath.Join(deployDir, dataDir) - } - // log dir will always be with values, but might not used by the component - logDir := spec.Abs(globalOptions.User, monitoredOptions.LogDir) - // Generate configs - t := task.NewBuilder(). - UserSSH(host, info.ssh, globalOptions.User, sshTimeout, sshType, globalOptions.SSHType). - MonitoredConfig( - clusterName, - comp, - host, - globalOptions.ResourceControl, - monitoredOptions, - globalOptions.User, - meta.DirPaths{ - Deploy: deployDir, - Data: []string{dataDir}, - Log: logDir, - Cache: specManager.Path(clusterName, spec.TempConfigPath), - }, - ). - BuildAsStep(fmt.Sprintf(" - Refresh config %s -> %s", comp, host)) - tasks = append(tasks, t) - } - } - return tasks -} - -func genAndSaveClusterCA(clusterName, tlsPath string) (*crypto.CertificateAuthority, error) { - ca, err := crypto.NewCA(clusterName) - if err != nil { - return nil, err - } - - // save CA private key - if err := file.SaveFileWithBackup(filepath.Join(tlsPath, spec.TLSCAKey), ca.Key.Pem(), ""); err != nil { - return nil, perrs.Annotatef(err, "cannot save CA private key for %s", clusterName) - } - - // save CA certificate - if err := file.SaveFileWithBackup( - filepath.Join(tlsPath, spec.TLSCACert), - pem.EncodeToMemory(&pem.Block{ - Type: "CERTIFICATE", - Bytes: ca.Cert.Raw, - }), ""); err != nil { - return nil, perrs.Annotatef(err, "cannot save CA certificate for %s", clusterName) - } - - return ca, nil -} - -func genAndSaveClientCert(ca *crypto.CertificateAuthority, clusterName, tlsPath string) error { - privKey, err := crypto.NewKeyPair(crypto.KeyTypeRSA, crypto.KeySchemeRSASSAPSSSHA256) - if err != nil { - return perrs.AddStack(err) - } - - // save client private key - if err := file.SaveFileWithBackup(filepath.Join(tlsPath, spec.TLSClientKey), privKey.Pem(), ""); err != nil { - return perrs.Annotatef(err, "cannot save client private key for %s", clusterName) - } - - csr, err := privKey.CSR( - "tiup-cluster-client", - fmt.Sprintf("%s-client", clusterName), - []string{}, []string{}, - ) - if err != nil { - return perrs.Annotatef(err, "cannot generate CSR of client certificate for %s", clusterName) - } - cert, err := ca.Sign(csr) - if err != nil { - return perrs.Annotatef(err, "cannot sign client certificate for %s", clusterName) - } - - // save client certificate - if err := file.SaveFileWithBackup( - filepath.Join(tlsPath, spec.TLSClientCert), - pem.EncodeToMemory(&pem.Block{ - Type: "CERTIFICATE", - Bytes: cert, - }), ""); err != nil { - return perrs.Annotatef(err, "cannot save client PEM certificate for %s", clusterName) - } - - // save pfx format certificate - clientCert, err := x509.ParseCertificate(cert) - if err != nil { - return perrs.Annotatef(err, "cannot decode signed client certificate for %s", clusterName) - } - pfxData, err := privKey.PKCS12(clientCert, ca) - if err != nil { - return perrs.Annotatef(err, "cannot encode client certificate to PKCS#12 format for %s", clusterName) - } - if err := file.SaveFileWithBackup( - filepath.Join(tlsPath, spec.PFXClientCert), - pfxData, - ""); err != nil { - return perrs.Annotatef(err, "cannot save client PKCS#12 certificate for %s", clusterName) - } - - return nil -} diff --git a/pkg/cluster/manager/basic.go b/pkg/cluster/manager/basic.go new file mode 100644 index 0000000000..0ab60208e4 --- /dev/null +++ b/pkg/cluster/manager/basic.go @@ -0,0 +1,179 @@ +// Copyright 2020 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + +package manager + +import ( + "errors" + + "github.com/joomcode/errorx" + perrs "github.com/pingcap/errors" + operator "github.com/pingcap/tiup/pkg/cluster/operation" + "github.com/pingcap/tiup/pkg/cluster/spec" + "github.com/pingcap/tiup/pkg/cluster/task" + "github.com/pingcap/tiup/pkg/logger/log" + "github.com/pingcap/tiup/pkg/meta" +) + +// EnableCluster enable/disable the service in a cluster +func (m *Manager) EnableCluster(name string, options operator.Options, isEnable bool) error { + if isEnable { + log.Infof("Enabling cluster %s...", name) + } else { + log.Infof("Disabling cluster %s...", name) + } + + metadata, err := m.meta(name) + if err != nil && !errors.Is(perrs.Cause(err), meta.ErrValidate) { + return perrs.AddStack(err) + } + + topo := metadata.GetTopology() + base := metadata.GetBaseMeta() + + b := m.sshTaskBuilder(name, topo, base.User, options) + + if isEnable { + b = b.Func("EnableCluster", func(ctx *task.Context) error { + return operator.Enable(ctx, topo, options, isEnable) + }) + } else { + b = b.Func("DisableCluster", func(ctx *task.Context) error { + return operator.Enable(ctx, topo, options, isEnable) + }) + } + + t := b.Build() + + if err := t.Execute(task.NewContext()); err != nil { + if errorx.Cast(err) != nil { + // FIXME: Map possible task errors and give suggestions. + return err + } + return perrs.Trace(err) + } + + if isEnable { + log.Infof("Enabled cluster `%s` successfully", name) + } else { + log.Infof("Disabled cluster `%s` successfully", name) + } + + return nil +} + +// StartCluster start the cluster with specified name. +func (m *Manager) StartCluster(name string, options operator.Options, fn ...func(b *task.Builder, metadata spec.Metadata)) error { + log.Infof("Starting cluster %s...", name) + + metadata, err := m.meta(name) + if err != nil && !errors.Is(perrs.Cause(err), meta.ErrValidate) { + return perrs.AddStack(err) + } + + topo := metadata.GetTopology() + base := metadata.GetBaseMeta() + + tlsCfg, err := topo.TLSConfig(m.specManager.Path(name, spec.TLSCertKeyDir)) + if err != nil { + return perrs.AddStack(err) + } + + b := m.sshTaskBuilder(name, topo, base.User, options). + Func("StartCluster", func(ctx *task.Context) error { + return operator.Start(ctx, topo, options, tlsCfg) + }) + + for _, f := range fn { + f(b, metadata) + } + + t := b.Build() + + if err := t.Execute(task.NewContext()); err != nil { + if errorx.Cast(err) != nil { + // FIXME: Map possible task errors and give suggestions. + return err + } + return perrs.Trace(err) + } + + log.Infof("Started cluster `%s` successfully", name) + return nil +} + +// StopCluster stop the cluster. +func (m *Manager) StopCluster(name string, options operator.Options) error { + metadata, err := m.meta(name) + if err != nil && !errors.Is(perrs.Cause(err), meta.ErrValidate) { + return perrs.AddStack(err) + } + + topo := metadata.GetTopology() + base := metadata.GetBaseMeta() + + tlsCfg, err := topo.TLSConfig(m.specManager.Path(name, spec.TLSCertKeyDir)) + if err != nil { + return err + } + + t := m.sshTaskBuilder(name, topo, base.User, options). + Func("StopCluster", func(ctx *task.Context) error { + return operator.Stop(ctx, topo, options, tlsCfg) + }). + Build() + + if err := t.Execute(task.NewContext()); err != nil { + if errorx.Cast(err) != nil { + // FIXME: Map possible task errors and give suggestions. + return err + } + return perrs.Trace(err) + } + + log.Infof("Stopped cluster `%s` successfully", name) + return nil +} + +// RestartCluster restart the cluster. +func (m *Manager) RestartCluster(name string, options operator.Options) error { + metadata, err := m.meta(name) + if err != nil && !errors.Is(perrs.Cause(err), meta.ErrValidate) { + return perrs.AddStack(err) + } + + topo := metadata.GetTopology() + base := metadata.GetBaseMeta() + + tlsCfg, err := topo.TLSConfig(m.specManager.Path(name, spec.TLSCertKeyDir)) + if err != nil { + return err + } + + t := m.sshTaskBuilder(name, topo, base.User, options). + Func("RestartCluster", func(ctx *task.Context) error { + return operator.Restart(ctx, topo, options, tlsCfg) + }). + Build() + + if err := t.Execute(task.NewContext()); err != nil { + if errorx.Cast(err) != nil { + // FIXME: Map possible task errors and give suggestions. + return err + } + return perrs.Trace(err) + } + + log.Infof("Restarted cluster `%s` successfully", name) + return nil +} diff --git a/pkg/cluster/manager/builder.go b/pkg/cluster/manager/builder.go new file mode 100644 index 0000000000..cd97912caf --- /dev/null +++ b/pkg/cluster/manager/builder.go @@ -0,0 +1,545 @@ +// Copyright 2020 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + +package manager + +import ( + "fmt" + "path/filepath" + "strings" + + "github.com/pingcap/tiup/pkg/cliutil" + "github.com/pingcap/tiup/pkg/cluster/executor" + operator "github.com/pingcap/tiup/pkg/cluster/operation" + "github.com/pingcap/tiup/pkg/cluster/spec" + "github.com/pingcap/tiup/pkg/cluster/task" + "github.com/pingcap/tiup/pkg/crypto" + "github.com/pingcap/tiup/pkg/environment" + "github.com/pingcap/tiup/pkg/meta" + pkgver "github.com/pingcap/tiup/pkg/repository/version" + "github.com/pingcap/tiup/pkg/set" +) + +// Dynamic reload Prometheus configuration +func buildDynReloadPromTasks(topo spec.Topology) []task.Task { + monitor := spec.FindComponent(topo, spec.ComponentPrometheus) + if monitor == nil { + return nil + } + instances := monitor.Instances() + if len(instances) == 0 { + return nil + } + var dynReloadTasks []task.Task + for _, inst := range monitor.Instances() { + dynReloadTasks = append(dynReloadTasks, task.NewBuilder().SystemCtl(inst.GetHost(), inst.ServiceName(), "reload", true).Build()) + } + return dynReloadTasks +} + +func buildScaleOutTask( + m *Manager, + name string, + metadata spec.Metadata, + mergedTopo spec.Topology, + opt ScaleOutOptions, + sshConnProps *cliutil.SSHConnectionProps, + newPart spec.Topology, + patchedComponents set.StringSet, + gOpt operator.Options, + afterDeploy func(b *task.Builder, newPart spec.Topology), + final func(b *task.Builder, name string, meta spec.Metadata), +) (task.Task, error) { + var ( + envInitTasks []task.Task // tasks which are used to initialize environment + downloadCompTasks []task.Task // tasks which are used to download components + deployCompTasks []task.Task // tasks which are used to copy components to remote host + refreshConfigTasks []task.Task // tasks which are used to refresh configuration + ) + + topo := metadata.GetTopology() + base := metadata.GetBaseMeta() + specManager := m.specManager + + tlsCfg, err := topo.TLSConfig(m.specManager.Path(name, spec.TLSCertKeyDir)) + if err != nil { + return nil, err + } + + // Initialize the environments + initializedHosts := set.NewStringSet() + metadata.GetTopology().IterInstance(func(instance spec.Instance) { + initializedHosts.Insert(instance.GetHost()) + }) + // uninitializedHosts are hosts which haven't been initialized yet + uninitializedHosts := make(map[string]hostInfo) // host -> ssh-port, os, arch + newPart.IterInstance(func(instance spec.Instance) { + if host := instance.GetHost(); !initializedHosts.Exist(host) { + if _, found := uninitializedHosts[host]; found { + return + } + + uninitializedHosts[host] = hostInfo{ + ssh: instance.GetSSHPort(), + os: instance.OS(), + arch: instance.Arch(), + } + + var dirs []string + globalOptions := metadata.GetTopology().BaseTopo().GlobalOptions + for _, dir := range []string{globalOptions.DeployDir, globalOptions.DataDir, globalOptions.LogDir} { + for _, dirname := range strings.Split(dir, ",") { + if dirname == "" { + continue + } + dirs = append(dirs, spec.Abs(globalOptions.User, dirname)) + } + } + t := task.NewBuilder(). + RootSSH( + instance.GetHost(), + instance.GetSSHPort(), + opt.User, + sshConnProps.Password, + sshConnProps.IdentityFile, + sshConnProps.IdentityFilePassphrase, + gOpt.SSHTimeout, + gOpt.SSHType, + globalOptions.SSHType, + ). + EnvInit(instance.GetHost(), base.User, base.Group, opt.SkipCreateUser || globalOptions.User == opt.User). + Mkdir(globalOptions.User, instance.GetHost(), dirs...). + Build() + envInitTasks = append(envInitTasks, t) + } + }) + + // Download missing component + downloadCompTasks = convertStepDisplaysToTasks(buildDownloadCompTasks(base.Version, newPart, m.bindVersion)) + + var iterErr error + // Deploy the new topology and refresh the configuration + newPart.IterInstance(func(inst spec.Instance) { + version := m.bindVersion(inst.ComponentName(), base.Version) + deployDir := spec.Abs(base.User, inst.DeployDir()) + // data dir would be empty for components which don't need it + dataDirs := spec.MultiDirAbs(base.User, inst.DataDir()) + // log dir will always be with values, but might not used by the component + logDir := spec.Abs(base.User, inst.LogDir()) + + deployDirs := []string{ + deployDir, logDir, + filepath.Join(deployDir, "bin"), + filepath.Join(deployDir, "conf"), + filepath.Join(deployDir, "scripts"), + } + if topo.BaseTopo().GlobalOptions.TLSEnabled { + deployDirs = append(deployDirs, filepath.Join(deployDir, "tls")) + } + // Deploy component + tb := task.NewBuilder(). + UserSSH(inst.GetHost(), inst.GetSSHPort(), base.User, gOpt.SSHTimeout, gOpt.SSHType, topo.BaseTopo().GlobalOptions.SSHType). + Mkdir(base.User, inst.GetHost(), deployDirs...). + Mkdir(base.User, inst.GetHost(), dataDirs...) + + srcPath := "" + if patchedComponents.Exist(inst.ComponentName()) { + srcPath = specManager.Path(name, spec.PatchDirName, inst.ComponentName()+".tar.gz") + } + + if deployerInstance, ok := inst.(DeployerInstance); ok { + deployerInstance.Deploy(tb, srcPath, deployDir, version, name, version) + } else { + // copy dependency component if needed + switch inst.ComponentName() { + case spec.ComponentTiSpark: + env := environment.GlobalEnv() + var sparkVer pkgver.Version + if sparkVer, _, iterErr = env.V1Repository().LatestStableVersion(spec.ComponentSpark, false); iterErr != nil { + return + } + tb = tb.DeploySpark(inst, sparkVer.String(), srcPath, deployDir) + default: + tb.CopyComponent( + inst.ComponentName(), + inst.OS(), + inst.Arch(), + version, + srcPath, + inst.GetHost(), + deployDir, + ) + } + } + // generate and transfer tls cert for instance + if topo.BaseTopo().GlobalOptions.TLSEnabled { + ca, err := crypto.ReadCA( + name, + m.specManager.Path(name, spec.TLSCertKeyDir, spec.TLSCACert), + m.specManager.Path(name, spec.TLSCertKeyDir, spec.TLSCAKey), + ) + if err != nil { + iterErr = err + return + } + tb = tb.TLSCert(inst, ca, meta.DirPaths{ + Deploy: deployDir, + Cache: m.specManager.Path(name, spec.TempConfigPath), + }) + } + + t := tb.ScaleConfig(name, + base.Version, + m.specManager, + topo, + inst, + base.User, + meta.DirPaths{ + Deploy: deployDir, + Data: dataDirs, + Log: logDir, + }, + ).Build() + deployCompTasks = append(deployCompTasks, t) + }) + if iterErr != nil { + return nil, iterErr + } + + hasImported := false + + mergedTopo.IterInstance(func(inst spec.Instance) { + deployDir := spec.Abs(base.User, inst.DeployDir()) + // data dir would be empty for components which don't need it + dataDirs := spec.MultiDirAbs(base.User, inst.DataDir()) + // log dir will always be with values, but might not used by the component + logDir := spec.Abs(base.User, inst.LogDir()) + + // Download and copy the latest component to remote if the cluster is imported from Ansible + tb := task.NewBuilder() + if inst.IsImported() { + switch compName := inst.ComponentName(); compName { + case spec.ComponentGrafana, spec.ComponentPrometheus, spec.ComponentAlertmanager: + version := m.bindVersion(compName, base.Version) + tb.Download(compName, inst.OS(), inst.Arch(), version). + CopyComponent(compName, inst.OS(), inst.Arch(), version, "", inst.GetHost(), deployDir) + } + hasImported = true + } + + // Refresh all configuration + t := tb.InitConfig(name, + base.Version, + m.specManager, + inst, + base.User, + true, // always ignore config check result in scale out + meta.DirPaths{ + Deploy: deployDir, + Data: dataDirs, + Log: logDir, + Cache: specManager.Path(name, spec.TempConfigPath), + }, + ).Build() + refreshConfigTasks = append(refreshConfigTasks, t) + }) + + // handle dir scheme changes + if hasImported { + if err := spec.HandleImportPathMigration(name); err != nil { + return task.NewBuilder().Build(), err + } + } + + // Deploy monitor relevant components to remote + dlTasks, dpTasks := buildMonitoredDeployTask( + m.bindVersion, + specManager, + name, + uninitializedHosts, + topo.BaseTopo().GlobalOptions, + topo.BaseTopo().MonitoredOptions, + base.Version, + gOpt, + ) + downloadCompTasks = append(downloadCompTasks, convertStepDisplaysToTasks(dlTasks)...) + deployCompTasks = append(deployCompTasks, convertStepDisplaysToTasks(dpTasks)...) + + builder := task.NewBuilder(). + SSHKeySet( + specManager.Path(name, "ssh", "id_rsa"), + specManager.Path(name, "ssh", "id_rsa.pub")). + Parallel(false, downloadCompTasks...). + Parallel(false, envInitTasks...). + ClusterSSH(topo, base.User, gOpt.SSHTimeout, gOpt.SSHType, topo.BaseTopo().GlobalOptions.SSHType). + Parallel(false, deployCompTasks...) + + if afterDeploy != nil { + afterDeploy(builder, newPart) + } + + builder. + ClusterSSH(newPart, base.User, gOpt.SSHTimeout, gOpt.SSHType, topo.BaseTopo().GlobalOptions.SSHType). + Func("Save meta", func(_ *task.Context) error { + metadata.SetTopology(mergedTopo) + return m.specManager.SaveMeta(name, metadata) + }). + Func("StartCluster", func(ctx *task.Context) error { + return operator.Start(ctx, newPart, operator.Options{OptTimeout: gOpt.OptTimeout}, tlsCfg) + }). + Parallel(false, refreshConfigTasks...). + Parallel(false, buildDynReloadPromTasks(metadata.GetTopology())...) + + if final != nil { + final(builder, name, metadata) + } + + return builder.Build(), nil +} + +type hostInfo struct { + ssh int // ssh port of host + os string // operating system + arch string // cpu architecture + // vendor string +} + +// Deprecated +func convertStepDisplaysToTasks(t []*task.StepDisplay) []task.Task { + tasks := make([]task.Task, 0, len(t)) + for _, sd := range t { + tasks = append(tasks, sd) + } + return tasks +} + +func buildMonitoredDeployTask( + bindVersion spec.BindVersion, + specManager *spec.SpecManager, + name string, + uniqueHosts map[string]hostInfo, // host -> ssh-port, os, arch + globalOptions *spec.GlobalOptions, + monitoredOptions *spec.MonitoredOptions, + version string, + gOpt operator.Options, +) (downloadCompTasks []*task.StepDisplay, deployCompTasks []*task.StepDisplay) { + if monitoredOptions == nil { + return + } + + uniqueCompOSArch := set.NewStringSet() + // monitoring agents + for _, comp := range []string{spec.ComponentNodeExporter, spec.ComponentBlackboxExporter} { + version := bindVersion(comp, version) + + for host, info := range uniqueHosts { + // populate unique comp-os-arch set + key := fmt.Sprintf("%s-%s-%s", comp, info.os, info.arch) + if found := uniqueCompOSArch.Exist(key); !found { + uniqueCompOSArch.Insert(key) + downloadCompTasks = append(downloadCompTasks, task.NewBuilder(). + Download(comp, info.os, info.arch, version). + BuildAsStep(fmt.Sprintf(" - Download %s:%s (%s/%s)", comp, version, info.os, info.arch))) + } + + deployDir := spec.Abs(globalOptions.User, monitoredOptions.DeployDir) + // data dir would be empty for components which don't need it + dataDir := monitoredOptions.DataDir + // the default data_dir is relative to deploy_dir + if dataDir != "" && !strings.HasPrefix(dataDir, "/") { + dataDir = filepath.Join(deployDir, dataDir) + } + // log dir will always be with values, but might not used by the component + logDir := spec.Abs(globalOptions.User, monitoredOptions.LogDir) + // Deploy component + t := task.NewBuilder(). + UserSSH(host, info.ssh, globalOptions.User, gOpt.SSHTimeout, gOpt.SSHType, globalOptions.SSHType). + Mkdir(globalOptions.User, host, + deployDir, dataDir, logDir, + filepath.Join(deployDir, "bin"), + filepath.Join(deployDir, "conf"), + filepath.Join(deployDir, "scripts")). + CopyComponent( + comp, + info.os, + info.arch, + version, + "", + host, + deployDir, + ). + MonitoredConfig( + name, + comp, + host, + globalOptions.ResourceControl, + monitoredOptions, + globalOptions.User, + meta.DirPaths{ + Deploy: deployDir, + Data: []string{dataDir}, + Log: logDir, + Cache: specManager.Path(name, spec.TempConfigPath), + }, + ). + BuildAsStep(fmt.Sprintf(" - Copy %s -> %s", comp, host)) + deployCompTasks = append(deployCompTasks, t) + } + } + return +} + +func buildRefreshMonitoredConfigTasks( + specManager *spec.SpecManager, + name string, + uniqueHosts map[string]hostInfo, // host -> ssh-port, os, arch + globalOptions spec.GlobalOptions, + monitoredOptions *spec.MonitoredOptions, + sshTimeout uint64, + sshType executor.SSHType, +) []*task.StepDisplay { + if monitoredOptions == nil { + return nil + } + + tasks := []*task.StepDisplay{} + // monitoring agents + for _, comp := range []string{spec.ComponentNodeExporter, spec.ComponentBlackboxExporter} { + for host, info := range uniqueHosts { + deployDir := spec.Abs(globalOptions.User, monitoredOptions.DeployDir) + // data dir would be empty for components which don't need it + dataDir := monitoredOptions.DataDir + // the default data_dir is relative to deploy_dir + if dataDir != "" && !strings.HasPrefix(dataDir, "/") { + dataDir = filepath.Join(deployDir, dataDir) + } + // log dir will always be with values, but might not used by the component + logDir := spec.Abs(globalOptions.User, monitoredOptions.LogDir) + // Generate configs + t := task.NewBuilder(). + UserSSH(host, info.ssh, globalOptions.User, sshTimeout, sshType, globalOptions.SSHType). + MonitoredConfig( + name, + comp, + host, + globalOptions.ResourceControl, + monitoredOptions, + globalOptions.User, + meta.DirPaths{ + Deploy: deployDir, + Data: []string{dataDir}, + Log: logDir, + Cache: specManager.Path(name, spec.TempConfigPath), + }, + ). + BuildAsStep(fmt.Sprintf(" - Refresh config %s -> %s", comp, host)) + tasks = append(tasks, t) + } + } + return tasks +} + +func buildRegenConfigTasks(m *Manager, name string, topo spec.Topology, base *spec.BaseMeta, nodes []string) ([]*task.StepDisplay, bool) { + var tasks []*task.StepDisplay + hasImported := false + deletedNodes := set.NewStringSet(nodes...) + + topo.IterInstance(func(instance spec.Instance) { + if deletedNodes.Exist(instance.ID()) { + return + } + compName := instance.ComponentName() + deployDir := spec.Abs(base.User, instance.DeployDir()) + // data dir would be empty for components which don't need it + dataDirs := spec.MultiDirAbs(base.User, instance.DataDir()) + // log dir will always be with values, but might not used by the component + logDir := spec.Abs(base.User, instance.LogDir()) + + // Download and copy the latest component to remote if the cluster is imported from Ansible + tb := task.NewBuilder() + if instance.IsImported() { + switch compName { + case spec.ComponentGrafana, spec.ComponentPrometheus, spec.ComponentAlertmanager: + version := m.bindVersion(compName, base.Version) + tb.Download(compName, instance.OS(), instance.Arch(), version). + CopyComponent( + compName, + instance.OS(), + instance.Arch(), + version, + "", // use default srcPath + instance.GetHost(), + deployDir, + ) + } + hasImported = true + } + + t := tb. + InitConfig( + name, + base.Version, + m.specManager, + instance, + base.User, + true, // always ignore config check result in scale in + meta.DirPaths{ + Deploy: deployDir, + Data: dataDirs, + Log: logDir, + Cache: m.specManager.Path(name, spec.TempConfigPath), + }, + ). + BuildAsStep(fmt.Sprintf(" - Regenerate config %s -> %s", compName, instance.ID())) + tasks = append(tasks, t) + }) + + return tasks, hasImported +} + +// buildDownloadCompTasks build download component tasks +func buildDownloadCompTasks(clusterVersion string, topo spec.Topology, bindVersion spec.BindVersion) []*task.StepDisplay { + var tasks []*task.StepDisplay + uniqueTaskList := set.NewStringSet() + topo.IterInstance(func(inst spec.Instance) { + key := fmt.Sprintf("%s-%s-%s", inst.ComponentName(), inst.OS(), inst.Arch()) + if found := uniqueTaskList.Exist(key); !found { + uniqueTaskList.Insert(key) + + // we don't set version for tispark, so the lastest tispark will be used + var version string + if inst.ComponentName() == spec.ComponentTiSpark { + // download spark as dependency of tispark + tasks = append(tasks, buildDownloadSparkTask(inst)) + } else { + version = bindVersion(inst.ComponentName(), clusterVersion) + } + + t := task.NewBuilder(). + Download(inst.ComponentName(), inst.OS(), inst.Arch(), version). + BuildAsStep(fmt.Sprintf(" - Download %s:%s (%s/%s)", + inst.ComponentName(), version, inst.OS(), inst.Arch())) + tasks = append(tasks, t) + } + }) + return tasks +} + +// buildDownloadSparkTask build download task for spark, which is a dependency of tispark +// FIXME: this is a hack and should be replaced by dependency handling in manifest processing +func buildDownloadSparkTask(inst spec.Instance) *task.StepDisplay { + return task.NewBuilder(). + Download(spec.ComponentSpark, inst.OS(), inst.Arch(), ""). + BuildAsStep(fmt.Sprintf(" - Download %s: (%s/%s)", + spec.ComponentSpark, inst.OS(), inst.Arch())) +} diff --git a/pkg/cluster/manager/cacert.go b/pkg/cluster/manager/cacert.go new file mode 100644 index 0000000000..81eacf76b8 --- /dev/null +++ b/pkg/cluster/manager/cacert.go @@ -0,0 +1,103 @@ +// Copyright 2020 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + +package manager + +import ( + "crypto/x509" + "encoding/pem" + "fmt" + "path/filepath" + + perrs "github.com/pingcap/errors" + "github.com/pingcap/tiup/pkg/cluster/spec" + "github.com/pingcap/tiup/pkg/crypto" + "github.com/pingcap/tiup/pkg/file" +) + +func genAndSaveClusterCA(name, tlsPath string) (*crypto.CertificateAuthority, error) { + ca, err := crypto.NewCA(name) + if err != nil { + return nil, err + } + + // save CA private key + if err := file.SaveFileWithBackup(filepath.Join(tlsPath, spec.TLSCAKey), ca.Key.Pem(), ""); err != nil { + return nil, perrs.Annotatef(err, "cannot save CA private key for %s", name) + } + + // save CA certificate + if err := file.SaveFileWithBackup( + filepath.Join(tlsPath, spec.TLSCACert), + pem.EncodeToMemory(&pem.Block{ + Type: "CERTIFICATE", + Bytes: ca.Cert.Raw, + }), ""); err != nil { + return nil, perrs.Annotatef(err, "cannot save CA certificate for %s", name) + } + + return ca, nil +} + +func genAndSaveClientCert(ca *crypto.CertificateAuthority, name, tlsPath string) error { + privKey, err := crypto.NewKeyPair(crypto.KeyTypeRSA, crypto.KeySchemeRSASSAPSSSHA256) + if err != nil { + return perrs.AddStack(err) + } + + // save client private key + if err := file.SaveFileWithBackup(filepath.Join(tlsPath, spec.TLSClientKey), privKey.Pem(), ""); err != nil { + return perrs.Annotatef(err, "cannot save client private key for %s", name) + } + + csr, err := privKey.CSR( + "tiup-cluster-client", + fmt.Sprintf("%s-client", name), + []string{}, []string{}, + ) + if err != nil { + return perrs.Annotatef(err, "cannot generate CSR of client certificate for %s", name) + } + cert, err := ca.Sign(csr) + if err != nil { + return perrs.Annotatef(err, "cannot sign client certificate for %s", name) + } + + // save client certificate + if err := file.SaveFileWithBackup( + filepath.Join(tlsPath, spec.TLSClientCert), + pem.EncodeToMemory(&pem.Block{ + Type: "CERTIFICATE", + Bytes: cert, + }), ""); err != nil { + return perrs.Annotatef(err, "cannot save client PEM certificate for %s", name) + } + + // save pfx format certificate + clientCert, err := x509.ParseCertificate(cert) + if err != nil { + return perrs.Annotatef(err, "cannot decode signed client certificate for %s", name) + } + pfxData, err := privKey.PKCS12(clientCert, ca) + if err != nil { + return perrs.Annotatef(err, "cannot encode client certificate to PKCS#12 format for %s", name) + } + if err := file.SaveFileWithBackup( + filepath.Join(tlsPath, spec.PFXClientCert), + pfxData, + ""); err != nil { + return perrs.Annotatef(err, "cannot save client PKCS#12 certificate for %s", name) + } + + return nil +} diff --git a/pkg/cluster/manager/cleanup.go b/pkg/cluster/manager/cleanup.go new file mode 100644 index 0000000000..7db1472236 --- /dev/null +++ b/pkg/cluster/manager/cleanup.go @@ -0,0 +1,84 @@ +// Copyright 2020 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + +package manager + +import ( + "github.com/fatih/color" + "github.com/joomcode/errorx" + perrs "github.com/pingcap/errors" + "github.com/pingcap/tiup/pkg/cliutil" + operator "github.com/pingcap/tiup/pkg/cluster/operation" + "github.com/pingcap/tiup/pkg/cluster/spec" + "github.com/pingcap/tiup/pkg/cluster/task" + "github.com/pingcap/tiup/pkg/logger/log" +) + +// CleanCluster cleans the cluster without destroying it +func (m *Manager) CleanCluster(name string, gOpt operator.Options, cleanOpt operator.Options, skipConfirm bool) error { + metadata, err := m.meta(name) + if err != nil { + return err + } + + topo := metadata.GetTopology() + base := metadata.GetBaseMeta() + + tlsCfg, err := topo.TLSConfig(m.specManager.Path(name, spec.TLSCertKeyDir)) + if err != nil { + return err + } + + if !skipConfirm { + target := "" + switch { + case cleanOpt.CleanupData && cleanOpt.CleanupLog: + target = "data and log" + case cleanOpt.CleanupData: + target = "data" + case cleanOpt.CleanupLog: + target = "log" + } + if err := cliutil.PromptForConfirmOrAbortError( + "This operation will clean %s %s cluster %s's %s.\nNodes will be ignored: %s\nRoles will be ignored: %s\nDo you want to continue? [y/N]:", + m.sysName, + color.HiYellowString(base.Version), + color.HiYellowString(name), + target, + cleanOpt.RetainDataNodes, + cleanOpt.RetainDataRoles); err != nil { + return err + } + log.Infof("Cleanup cluster...") + } + + t := m.sshTaskBuilder(name, topo, base.User, gOpt). + Func("StopCluster", func(ctx *task.Context) error { + return operator.Stop(ctx, topo, operator.Options{}, tlsCfg) + }). + Func("CleanupCluster", func(ctx *task.Context) error { + return operator.Cleanup(ctx, topo, cleanOpt) + }). + Build() + + if err := t.Execute(task.NewContext()); err != nil { + if errorx.Cast(err) != nil { + // FIXME: Map possible task errors and give suggestions. + return err + } + return perrs.Trace(err) + } + + log.Infof("Cleanup cluster `%s` successfully", name) + return nil +} diff --git a/pkg/cluster/manager/deploy.go b/pkg/cluster/manager/deploy.go new file mode 100644 index 0000000000..a65bddfab9 --- /dev/null +++ b/pkg/cluster/manager/deploy.go @@ -0,0 +1,351 @@ +// Copyright 2020 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + +package manager + +import ( + "errors" + "fmt" + "os" + "path/filepath" + "strings" + + "github.com/fatih/color" + "github.com/joomcode/errorx" + perrs "github.com/pingcap/errors" + "github.com/pingcap/tiup/pkg/cliutil" + "github.com/pingcap/tiup/pkg/cluster/clusterutil" + "github.com/pingcap/tiup/pkg/cluster/executor" + operator "github.com/pingcap/tiup/pkg/cluster/operation" + "github.com/pingcap/tiup/pkg/cluster/spec" + "github.com/pingcap/tiup/pkg/cluster/task" + "github.com/pingcap/tiup/pkg/crypto" + "github.com/pingcap/tiup/pkg/environment" + "github.com/pingcap/tiup/pkg/logger/log" + "github.com/pingcap/tiup/pkg/meta" + pkgver "github.com/pingcap/tiup/pkg/repository/version" + "github.com/pingcap/tiup/pkg/set" + "github.com/pingcap/tiup/pkg/utils" +) + +// DeployOptions contains the options for scale out. +// TODO: merge ScaleOutOptions, should check config too when scale out. +type DeployOptions struct { + User string // username to login to the SSH server + SkipCreateUser bool // don't create the user + IdentityFile string // path to the private key file + UsePassword bool // use password instead of identity file for ssh connection + IgnoreConfigCheck bool // ignore config check result + NoLabels bool // don't check labels for TiKV instance +} + +// DeployerInstance is a instance can deploy to a target deploy directory. +type DeployerInstance interface { + Deploy(b *task.Builder, srcPath string, deployDir string, version string, name string, clusterVersion string) +} + +// Deploy a cluster. +func (m *Manager) Deploy( + name string, + clusterVersion string, + topoFile string, + opt DeployOptions, + afterDeploy func(b *task.Builder, newPart spec.Topology), + skipConfirm bool, + gOpt operator.Options, +) error { + if err := clusterutil.ValidateClusterNameOrError(name); err != nil { + return err + } + + exist, err := m.specManager.Exist(name) + if err != nil { + return perrs.AddStack(err) + } + + if exist { + // FIXME: When change to use args, the suggestion text need to be updatem. + return errDeployNameDuplicate. + New("Cluster name '%s' is duplicated", name). + WithProperty(cliutil.SuggestionFromFormat("Please specify another cluster name")) + } + + metadata := m.specManager.NewMetadata() + topo := metadata.GetTopology() + + if err := spec.ParseTopologyYaml(topoFile, topo); err != nil { + return err + } + + spec.ExpandRelativeDir(topo) + + base := topo.BaseTopo() + if sshType := gOpt.SSHType; sshType != "" { + base.GlobalOptions.SSHType = sshType + } + + if topo, ok := topo.(*spec.Specification); ok && !opt.NoLabels { + // Check if TiKV's label set correctly + lbs, err := topo.LocationLabels() + if err != nil { + return err + } + if err := spec.CheckTiKVLabels(lbs, topo); err != nil { + return perrs.Errorf("check TiKV label failed, please fix that before continue:\n%s", err) + } + } + + clusterList, err := m.specManager.GetAllClusters() + if err != nil { + return err + } + if err := spec.CheckClusterPortConflict(clusterList, name, topo); err != nil { + return err + } + if err := spec.CheckClusterDirConflict(clusterList, name, topo); err != nil { + return err + } + + if !skipConfirm { + if err := m.confirmTopology(name, clusterVersion, topo, set.NewStringSet()); err != nil { + return err + } + } + + var sshConnProps *cliutil.SSHConnectionProps = &cliutil.SSHConnectionProps{} + if gOpt.SSHType != executor.SSHTypeNone { + var err error + if sshConnProps, err = cliutil.ReadIdentityFileOrPassword(opt.IdentityFile, opt.UsePassword); err != nil { + return err + } + } + + if err := os.MkdirAll(m.specManager.Path(name), 0755); err != nil { + return errorx.InitializationFailed. + Wrap(err, "Failed to create cluster metadata directory '%s'", m.specManager.Path(name)). + WithProperty(cliutil.SuggestionFromString("Please check file system permissions and try again.")) + } + + var ( + envInitTasks []*task.StepDisplay // tasks which are used to initialize environment + downloadCompTasks []*task.StepDisplay // tasks which are used to download components + deployCompTasks []*task.StepDisplay // tasks which are used to copy components to remote host + ) + + // Initialize environment + uniqueHosts := make(map[string]hostInfo) // host -> ssh-port, os, arch + globalOptions := base.GlobalOptions + + // generate CA and client cert for TLS enabled cluster + var ca *crypto.CertificateAuthority + if globalOptions.TLSEnabled { + // generate CA + tlsPath := m.specManager.Path(name, spec.TLSCertKeyDir) + if err := utils.CreateDir(tlsPath); err != nil { + return err + } + ca, err = genAndSaveClusterCA(name, tlsPath) + if err != nil { + return err + } + + // generate client cert + if err = genAndSaveClientCert(ca, name, tlsPath); err != nil { + return err + } + } + + var iterErr error // error when itering over instances + iterErr = nil + topo.IterInstance(func(inst spec.Instance) { + if _, found := uniqueHosts[inst.GetHost()]; !found { + // check for "imported" parameter, it can not be true when scaling out + if inst.IsImported() { + iterErr = errors.New( + "'imported' is set to 'true' for new instance, this is only used " + + "for instances imported from tidb-ansible and make no sense when " + + "deploying new instances, please delete the line or set it to 'false' for new instances") + return // skip the host to avoid issues + } + + uniqueHosts[inst.GetHost()] = hostInfo{ + ssh: inst.GetSSHPort(), + os: inst.OS(), + arch: inst.Arch(), + } + var dirs []string + for _, dir := range []string{globalOptions.DeployDir, globalOptions.LogDir} { + if dir == "" { + continue + } + dirs = append(dirs, spec.Abs(globalOptions.User, dir)) + } + // the default, relative path of data dir is under deploy dir + if strings.HasPrefix(globalOptions.DataDir, "/") { + dirs = append(dirs, globalOptions.DataDir) + } + t := task.NewBuilder(). + RootSSH( + inst.GetHost(), + inst.GetSSHPort(), + opt.User, + sshConnProps.Password, + sshConnProps.IdentityFile, + sshConnProps.IdentityFilePassphrase, + gOpt.SSHTimeout, + gOpt.SSHType, + globalOptions.SSHType, + ). + EnvInit(inst.GetHost(), globalOptions.User, globalOptions.Group, opt.SkipCreateUser || globalOptions.User == opt.User). + Mkdir(globalOptions.User, inst.GetHost(), dirs...). + BuildAsStep(fmt.Sprintf(" - Prepare %s:%d", inst.GetHost(), inst.GetSSHPort())) + envInitTasks = append(envInitTasks, t) + } + }) + + if iterErr != nil { + return iterErr + } + + // Download missing component + downloadCompTasks = buildDownloadCompTasks(clusterVersion, topo, m.bindVersion) + + // Deploy components to remote + topo.IterInstance(func(inst spec.Instance) { + version := m.bindVersion(inst.ComponentName(), clusterVersion) + deployDir := spec.Abs(globalOptions.User, inst.DeployDir()) + // data dir would be empty for components which don't need it + dataDirs := spec.MultiDirAbs(globalOptions.User, inst.DataDir()) + // log dir will always be with values, but might not used by the component + logDir := spec.Abs(globalOptions.User, inst.LogDir()) + // Deploy component + // prepare deployment server + deployDirs := []string{ + deployDir, logDir, + filepath.Join(deployDir, "bin"), + filepath.Join(deployDir, "conf"), + filepath.Join(deployDir, "scripts"), + } + if globalOptions.TLSEnabled { + deployDirs = append(deployDirs, filepath.Join(deployDir, "tls")) + } + t := task.NewBuilder(). + UserSSH(inst.GetHost(), inst.GetSSHPort(), globalOptions.User, gOpt.SSHTimeout, gOpt.SSHType, globalOptions.SSHType). + Mkdir(globalOptions.User, inst.GetHost(), deployDirs...). + Mkdir(globalOptions.User, inst.GetHost(), dataDirs...) + + if deployerInstance, ok := inst.(DeployerInstance); ok { + deployerInstance.Deploy(t, "", deployDir, version, name, clusterVersion) + } else { + // copy dependency component if needed + switch inst.ComponentName() { + case spec.ComponentTiSpark: + env := environment.GlobalEnv() + var sparkVer pkgver.Version + if sparkVer, _, iterErr = env.V1Repository().LatestStableVersion(spec.ComponentSpark, false); iterErr != nil { + return + } + t = t.DeploySpark(inst, sparkVer.String(), "" /* default srcPath */, deployDir) + default: + t = t.CopyComponent( + inst.ComponentName(), + inst.OS(), + inst.Arch(), + version, + "", // use default srcPath + inst.GetHost(), + deployDir, + ) + } + } + + // generate and transfer tls cert for instance + if globalOptions.TLSEnabled { + t = t.TLSCert(inst, ca, meta.DirPaths{ + Deploy: deployDir, + Cache: m.specManager.Path(name, spec.TempConfigPath), + }) + } + + // generate configs for the component + t = t.InitConfig( + name, + clusterVersion, + m.specManager, + inst, + globalOptions.User, + opt.IgnoreConfigCheck, + meta.DirPaths{ + Deploy: deployDir, + Data: dataDirs, + Log: logDir, + Cache: m.specManager.Path(name, spec.TempConfigPath), + }, + ) + + deployCompTasks = append(deployCompTasks, + t.BuildAsStep(fmt.Sprintf(" - Copy %s -> %s", inst.ComponentName(), inst.GetHost())), + ) + }) + + if iterErr != nil { + return iterErr + } + + // Deploy monitor relevant components to remote + dlTasks, dpTasks := buildMonitoredDeployTask( + m.bindVersion, + m.specManager, + name, + uniqueHosts, + globalOptions, + topo.GetMonitoredOptions(), + clusterVersion, + gOpt, + ) + downloadCompTasks = append(downloadCompTasks, dlTasks...) + deployCompTasks = append(deployCompTasks, dpTasks...) + + builder := task.NewBuilder(). + Step("+ Generate SSH keys", + task.NewBuilder().SSHKeyGen(m.specManager.Path(name, "ssh", "id_rsa")).Build()). + ParallelStep("+ Download TiDB components", false, downloadCompTasks...). + ParallelStep("+ Initialize target host environments", false, envInitTasks...). + ParallelStep("+ Copy files", false, deployCompTasks...) + + if afterDeploy != nil { + afterDeploy(builder, topo) + } + + t := builder.Build() + + if err := t.Execute(task.NewContext()); err != nil { + if errorx.Cast(err) != nil { + // FIXME: Map possible task errors and give suggestions. + return err + } + return perrs.AddStack(err) + } + + metadata.SetUser(globalOptions.User) + metadata.SetVersion(clusterVersion) + err = m.specManager.SaveMeta(name, metadata) + + if err != nil { + return perrs.AddStack(err) + } + + hint := color.New(color.Bold).Sprintf("%s start %s", cliutil.OsArgs0(), name) + log.Infof("Deployed cluster `%s` successfully, you can start the cluster via `%s`", name, hint) + return nil +} diff --git a/pkg/cluster/manager/destroy.go b/pkg/cluster/manager/destroy.go new file mode 100644 index 0000000000..da41f9fc75 --- /dev/null +++ b/pkg/cluster/manager/destroy.go @@ -0,0 +1,148 @@ +// Copyright 2020 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + +package manager + +import ( + "errors" + "fmt" + + "github.com/fatih/color" + "github.com/joomcode/errorx" + perrs "github.com/pingcap/errors" + "github.com/pingcap/tiup/pkg/cliutil" + operator "github.com/pingcap/tiup/pkg/cluster/operation" + "github.com/pingcap/tiup/pkg/cluster/spec" + "github.com/pingcap/tiup/pkg/cluster/task" + "github.com/pingcap/tiup/pkg/logger/log" + "github.com/pingcap/tiup/pkg/meta" +) + +// DestroyCluster destroy the cluster. +func (m *Manager) DestroyCluster(name string, gOpt operator.Options, destroyOpt operator.Options, skipConfirm bool) error { + metadata, err := m.meta(name) + if err != nil && !errors.Is(perrs.Cause(err), meta.ErrValidate) && + !errors.Is(perrs.Cause(err), spec.ErrNoTiSparkMaster) && + !errors.Is(perrs.Cause(err), spec.ErrMultipleTiSparkMaster) && + !errors.Is(perrs.Cause(err), spec.ErrMultipleTisparkWorker) { + return perrs.AddStack(err) + } + + topo := metadata.GetTopology() + base := metadata.GetBaseMeta() + + tlsCfg, err := topo.TLSConfig(m.specManager.Path(name, spec.TLSCertKeyDir)) + if err != nil { + return err + } + + if !skipConfirm { + if err := cliutil.PromptForConfirmOrAbortError( + "This operation will destroy %s %s cluster %s and its data.\nDo you want to continue? [y/N]:", + m.sysName, + color.HiYellowString(base.Version), + color.HiYellowString(name)); err != nil { + return err + } + log.Infof("Destroying cluster...") + } + + t := m.sshTaskBuilder(name, topo, base.User, gOpt). + Func("StopCluster", func(ctx *task.Context) error { + return operator.Stop(ctx, topo, operator.Options{Force: destroyOpt.Force}, tlsCfg) + }). + Func("DestroyCluster", func(ctx *task.Context) error { + return operator.Destroy(ctx, topo, destroyOpt) + }). + Build() + + if err := t.Execute(task.NewContext()); err != nil { + if errorx.Cast(err) != nil { + // FIXME: Map possible task errors and give suggestions. + return err + } + return perrs.Trace(err) + } + + if err := m.specManager.Remove(name); err != nil { + return perrs.Trace(err) + } + + log.Infof("Destroyed cluster `%s` successfully", name) + return nil +} + +// DestroyTombstone destroy and remove instances that is in tombstone state +func (m *Manager) DestroyTombstone( + name string, + gOpt operator.Options, + skipConfirm bool, +) error { + metadata, err := m.meta(name) + // allow specific validation errors so that user can recover a broken + // cluster if it is somehow in a bad state. + if err != nil && + !errors.Is(perrs.Cause(err), spec.ErrNoTiSparkMaster) { + return perrs.AddStack(err) + } + + topo := metadata.GetTopology() + base := metadata.GetBaseMeta() + + clusterMeta := metadata.(*spec.ClusterMeta) + cluster := clusterMeta.Topology + + if !operator.NeedCheckTombstone(cluster) { + return nil + } + + tlsCfg, err := topo.TLSConfig(m.specManager.Path(name, spec.TLSCertKeyDir)) + if err != nil { + return err + } + + b := m.sshTaskBuilder(name, topo, base.User, gOpt) + + var nodes []string + b. + Func("FindTomestoneNodes", func(ctx *task.Context) (err error) { + nodes, err = operator.DestroyTombstone(ctx, cluster, true /* returnNodesOnly */, gOpt, tlsCfg) + if !skipConfirm { + err = cliutil.PromptForConfirmOrAbortError( + color.HiYellowString(fmt.Sprintf("Will destroy these nodes: %v\nDo you confirm this action? [y/N]:", nodes)), + ) + if err != nil { + return err + } + } + log.Infof("Start destroy Tombstone nodes: %v ...", nodes) + return err + }). + ClusterOperate(cluster, operator.DestroyTombstoneOperation, gOpt, tlsCfg). + UpdateMeta(name, clusterMeta, nodes). + UpdateTopology(name, m.specManager.Path(name), clusterMeta, nodes) + + regenConfigTasks, _ := buildRegenConfigTasks(m, name, topo, base, nodes) + t := b.ParallelStep("+ Refresh instance configs", true, regenConfigTasks...).Parallel(true, buildDynReloadPromTasks(metadata.GetTopology())...).Build() + if err := t.Execute(task.NewContext()); err != nil { + if errorx.Cast(err) != nil { + // FIXME: Map possible task errors and give suggestions. + return err + } + return perrs.Trace(err) + } + + log.Infof("Destroy success") + + return nil +} diff --git a/pkg/cluster/manager/display.go b/pkg/cluster/manager/display.go new file mode 100644 index 0000000000..61026bac6a --- /dev/null +++ b/pkg/cluster/manager/display.go @@ -0,0 +1,196 @@ +// Copyright 2020 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + +package manager + +import ( + "errors" + "fmt" + "sort" + "strings" + "time" + + "github.com/fatih/color" + perrs "github.com/pingcap/errors" + "github.com/pingcap/tiup/pkg/cliutil" + "github.com/pingcap/tiup/pkg/cluster/api" + operator "github.com/pingcap/tiup/pkg/cluster/operation" + "github.com/pingcap/tiup/pkg/cluster/spec" + "github.com/pingcap/tiup/pkg/cluster/task" + "github.com/pingcap/tiup/pkg/logger/log" + "github.com/pingcap/tiup/pkg/meta" + "github.com/pingcap/tiup/pkg/set" + "github.com/pingcap/tiup/pkg/utils" +) + +// Display cluster meta and topology. +func (m *Manager) Display(name string, opt operator.Options) error { + metadata, err := m.meta(name) + if err != nil && !errors.Is(perrs.Cause(err), meta.ErrValidate) && + !errors.Is(perrs.Cause(err), spec.ErrNoTiSparkMaster) { + return perrs.AddStack(err) + } + + topo := metadata.GetTopology() + base := metadata.GetBaseMeta() + // display cluster meta + cyan := color.New(color.FgCyan, color.Bold) + fmt.Printf("Cluster type: %s\n", cyan.Sprint(m.sysName)) + fmt.Printf("Cluster name: %s\n", cyan.Sprint(name)) + fmt.Printf("Cluster version: %s\n", cyan.Sprint(base.Version)) + fmt.Printf("SSH type: %s\n", cyan.Sprint(topo.BaseTopo().GlobalOptions.SSHType)) + + // display TLS info + if topo.BaseTopo().GlobalOptions.TLSEnabled { + fmt.Printf("TLS encryption: %s\n", cyan.Sprint("enabled")) + fmt.Printf("CA certificate: %s\n", cyan.Sprint( + m.specManager.Path(name, spec.TLSCertKeyDir, spec.TLSCACert), + )) + fmt.Printf("Client private key: %s\n", cyan.Sprint( + m.specManager.Path(name, spec.TLSCertKeyDir, spec.TLSClientKey), + )) + fmt.Printf("Client certificate: %s\n", cyan.Sprint( + m.specManager.Path(name, spec.TLSCertKeyDir, spec.TLSClientCert), + )) + } + + // display topology + clusterTable := [][]string{ + // Header + {"ID", "Role", "Host", "Ports", "OS/Arch", "Status", "Data Dir", "Deploy Dir"}, + } + + ctx := task.NewContext() + err = ctx.SetSSHKeySet(m.specManager.Path(name, "ssh", "id_rsa"), + m.specManager.Path(name, "ssh", "id_rsa.pub")) + if err != nil { + return perrs.AddStack(err) + } + + err = ctx.SetClusterSSH(topo, base.User, opt.SSHTimeout, opt.SSHType, topo.BaseTopo().GlobalOptions.SSHType) + if err != nil { + return perrs.AddStack(err) + } + + filterRoles := set.NewStringSet(opt.Roles...) + filterNodes := set.NewStringSet(opt.Nodes...) + pdList := topo.BaseTopo().MasterList + tlsCfg, err := topo.TLSConfig(m.specManager.Path(name, spec.TLSCertKeyDir)) + if err != nil { + return err + } + for _, comp := range topo.ComponentsByStartOrder() { + for _, ins := range comp.Instances() { + // apply role filter + if len(filterRoles) > 0 && !filterRoles.Exist(ins.Role()) { + continue + } + // apply node filter + if len(filterNodes) > 0 && !filterNodes.Exist(ins.ID()) { + continue + } + + dataDir := "-" + insDirs := ins.UsedDirs() + deployDir := insDirs[0] + if len(insDirs) > 1 { + dataDir = insDirs[1] + } + + status := ins.Status(tlsCfg, pdList...) + // Query the service status + if status == "-" { + e, found := ctx.GetExecutor(ins.GetHost()) + if found { + active, _ := operator.GetServiceStatus(e, ins.ServiceName()) + if parts := strings.Split(strings.TrimSpace(active), " "); len(parts) > 2 { + if parts[1] == "active" { + status = "Up" + } else { + status = parts[1] + } + } + } + } + clusterTable = append(clusterTable, []string{ + color.CyanString(ins.ID()), + ins.Role(), + ins.GetHost(), + utils.JoinInt(ins.UsedPorts(), "/"), + cliutil.OsArch(ins.OS(), ins.Arch()), + formatInstanceStatus(status), + dataDir, + deployDir, + }) + } + } + + // Sort by role,host,ports + sort.Slice(clusterTable[1:], func(i, j int) bool { + lhs, rhs := clusterTable[i+1], clusterTable[j+1] + // column: 1 => role, 2 => host, 3 => ports + for _, col := range []int{1, 2} { + if lhs[col] != rhs[col] { + return lhs[col] < rhs[col] + } + } + return lhs[3] < rhs[3] + }) + + cliutil.PrintTable(clusterTable, true) + fmt.Printf("Total nodes: %d\n", len(clusterTable)-1) + + if t, ok := topo.(*spec.Specification); ok { + // Check if TiKV's label set correctly + pdClient := api.NewPDClient(pdList, 10*time.Second, tlsCfg) + if lbs, err := pdClient.GetLocationLabels(); err != nil { + log.Debugf("get location labels from pd failed: %v", err) + } else if err := spec.CheckTiKVLabels(lbs, pdClient); err != nil { + color.Yellow("\nWARN: there is something wrong with TiKV labels, which may cause data losing:\n%v", err) + } + + // Check if there is some instance in tombstone state + nodes, _ := operator.DestroyTombstone(ctx, t, true /* returnNodesOnly */, opt, tlsCfg) + if len(nodes) != 0 { + color.Green("There are some nodes can be pruned: \n\tNodes: %+v\n\tYou can destroy them with the command: `tiup cluster prune %s`", nodes, name) + } + } + + return nil +} + +func formatInstanceStatus(status string) string { + lowercaseStatus := strings.ToLower(status) + + startsWith := func(prefixs ...string) bool { + for _, prefix := range prefixs { + if strings.HasPrefix(lowercaseStatus, prefix) { + return true + } + } + return false + } + + switch { + case startsWith("up|l"): // up|l, up|l|ui + return color.HiGreenString(status) + case startsWith("up"): + return color.GreenString(status) + case startsWith("down", "err"): // down, down|ui + return color.RedString(status) + case startsWith("tombstone", "disconnected"), strings.Contains(status, "offline"): + return color.YellowString(status) + default: + return status + } +} diff --git a/pkg/cluster/manager/edit_config.go b/pkg/cluster/manager/edit_config.go new file mode 100644 index 0000000000..8e0c5082e0 --- /dev/null +++ b/pkg/cluster/manager/edit_config.go @@ -0,0 +1,145 @@ +// Copyright 2020 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + +package manager + +import ( + "bytes" + "errors" + "fmt" + "io" + "io/ioutil" + "os" + + "github.com/fatih/color" + perrs "github.com/pingcap/errors" + "github.com/pingcap/tiup/pkg/cliutil" + "github.com/pingcap/tiup/pkg/cluster/spec" + "github.com/pingcap/tiup/pkg/logger/log" + "github.com/pingcap/tiup/pkg/meta" + "github.com/pingcap/tiup/pkg/utils" + "gopkg.in/yaml.v2" +) + +// EditConfig lets the user edit the cluster's config. +func (m *Manager) EditConfig(name string, skipConfirm bool) error { + metadata, err := m.meta(name) + if err != nil && !errors.Is(perrs.Cause(err), meta.ErrValidate) { + return perrs.AddStack(err) + } + + topo := metadata.GetTopology() + + data, err := yaml.Marshal(topo) + if err != nil { + return perrs.AddStack(err) + } + + newTopo, err := m.editTopo(topo, data, skipConfirm) + if err != nil { + return perrs.AddStack(err) + } + + if newTopo == nil { + return nil + } + + log.Infof("Apply the change...") + metadata.SetTopology(newTopo) + err = m.specManager.SaveMeta(name, metadata) + if err != nil { + return perrs.Annotate(err, "failed to save meta") + } + + log.Infof("Apply change successfully, please use `%s reload %s [-N ] [-R ]` to reload config.", cliutil.OsArgs0(), name) + return nil +} + +// 1. Write Topology to a temporary file. +// 2. Open file in editor. +// 3. Check and update Topology. +// 4. Save meta file. +func (m *Manager) editTopo(origTopo spec.Topology, data []byte, skipConfirm bool) (spec.Topology, error) { + file, err := ioutil.TempFile(os.TempDir(), "*") + if err != nil { + return nil, perrs.AddStack(err) + } + + name := file.Name() + + _, err = io.Copy(file, bytes.NewReader(data)) + if err != nil { + return nil, perrs.AddStack(err) + } + + err = file.Close() + if err != nil { + return nil, perrs.AddStack(err) + } + + err = utils.OpenFileInEditor(name) + if err != nil { + return nil, perrs.AddStack(err) + } + + // Now user finish editing the file. + newData, err := ioutil.ReadFile(name) + if err != nil { + return nil, perrs.AddStack(err) + } + + newTopo := m.specManager.NewMetadata().GetTopology() + err = yaml.UnmarshalStrict(newData, newTopo) + if err != nil { + fmt.Print(color.RedString("New topology could not be saved: ")) + log.Infof("Failed to parse topology file: %v", err) + if !cliutil.PromptForConfirmNo("Do you want to continue editing? [Y/n]: ") { + return m.editTopo(origTopo, newData, skipConfirm) + } + log.Infof("Nothing changed.") + return nil, nil + } + + // report error if immutable field has been changed + if err := utils.ValidateSpecDiff(origTopo, newTopo); err != nil { + fmt.Print(color.RedString("New topology could not be saved: ")) + log.Errorf("%s", err) + if !cliutil.PromptForConfirmNo("Do you want to continue editing? [Y/n]: ") { + return m.editTopo(origTopo, newData, skipConfirm) + } + log.Infof("Nothing changed.") + return nil, nil + } + + origData, err := yaml.Marshal(origTopo) + if err != nil { + return nil, perrs.AddStack(err) + } + + if bytes.Equal(origData, newData) { + log.Infof("The file has nothing changed") + return nil, nil + } + + utils.ShowDiff(string(origData), string(newData), os.Stdout) + + if !skipConfirm { + if err := cliutil.PromptForConfirmOrAbortError( + color.HiYellowString("Please check change highlight above, do you want to apply the change? [y/N]:"), + ); err != nil { + return nil, err + } + } + + return newTopo, nil +} diff --git a/pkg/cluster/manager/exec.go b/pkg/cluster/manager/exec.go new file mode 100644 index 0000000000..28a64c40c3 --- /dev/null +++ b/pkg/cluster/manager/exec.go @@ -0,0 +1,100 @@ +// Copyright 2020 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + +package manager + +import ( + "github.com/fatih/color" + "github.com/joomcode/errorx" + perrs "github.com/pingcap/errors" + operator "github.com/pingcap/tiup/pkg/cluster/operation" + "github.com/pingcap/tiup/pkg/cluster/spec" + "github.com/pingcap/tiup/pkg/cluster/task" + "github.com/pingcap/tiup/pkg/logger/log" + "github.com/pingcap/tiup/pkg/set" +) + +// ExecOptions for exec shell commanm. +type ExecOptions struct { + Command string + Sudo bool +} + +// Exec shell command on host in the tidb cluster. +func (m *Manager) Exec(name string, opt ExecOptions, gOpt operator.Options) error { + metadata, err := m.meta(name) + if err != nil { + return perrs.AddStack(err) + } + + topo := metadata.GetTopology() + base := metadata.GetBaseMeta() + + filterRoles := set.NewStringSet(gOpt.Roles...) + filterNodes := set.NewStringSet(gOpt.Nodes...) + + var shellTasks []task.Task + uniqueHosts := map[string]int{} // host -> ssh-port + topo.IterInstance(func(inst spec.Instance) { + if _, found := uniqueHosts[inst.GetHost()]; !found { + if len(gOpt.Roles) > 0 && !filterRoles.Exist(inst.Role()) { + return + } + + if len(gOpt.Nodes) > 0 && !filterNodes.Exist(inst.GetHost()) { + return + } + + uniqueHosts[inst.GetHost()] = inst.GetSSHPort() + } + }) + + for host := range uniqueHosts { + shellTasks = append(shellTasks, + task.NewBuilder(). + Shell(host, opt.Command, opt.Sudo). + Build()) + } + + t := m.sshTaskBuilder(name, topo, base.User, gOpt). + Parallel(false, shellTasks...). + Build() + + execCtx := task.NewContext() + if err := t.Execute(execCtx); err != nil { + if errorx.Cast(err) != nil { + // FIXME: Map possible task errors and give suggestions. + return err + } + return perrs.Trace(err) + } + + // print outputs + for host := range uniqueHosts { + stdout, stderr, ok := execCtx.GetOutputs(host) + if !ok { + continue + } + log.Infof("Outputs of %s on %s:", + color.CyanString(opt.Command), + color.CyanString(host)) + if len(stdout) > 0 { + log.Infof("%s:\n%s", color.GreenString("stdout"), stdout) + } + if len(stderr) > 0 { + log.Infof("%s:\n%s", color.RedString("stderr"), stderr) + } + } + + return nil +} diff --git a/pkg/cluster/manager/list.go b/pkg/cluster/manager/list.go new file mode 100644 index 0000000000..f1feb10a83 --- /dev/null +++ b/pkg/cluster/manager/list.go @@ -0,0 +1,57 @@ +// Copyright 2020 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + +package manager + +import ( + "errors" + + perrs "github.com/pingcap/errors" + "github.com/pingcap/tiup/pkg/cliutil" + "github.com/pingcap/tiup/pkg/cluster/spec" + "github.com/pingcap/tiup/pkg/meta" +) + +// ListCluster list the clusters. +func (m *Manager) ListCluster() error { + names, err := m.specManager.List() + if err != nil { + return perrs.AddStack(err) + } + + clusterTable := [][]string{ + // Header + {"Name", "User", "Version", "Path", "PrivateKey"}, + } + + for _, name := range names { + metadata, err := m.meta(name) + if err != nil && !errors.Is(perrs.Cause(err), meta.ErrValidate) && + !errors.Is(perrs.Cause(err), spec.ErrNoTiSparkMaster) { + return perrs.Trace(err) + } + + base := metadata.GetBaseMeta() + + clusterTable = append(clusterTable, []string{ + name, + base.User, + base.Version, + m.specManager.Path(name), + m.specManager.Path(name, "ssh", "id_rsa"), + }) + } + + cliutil.PrintTable(clusterTable, true) + return nil +} diff --git a/pkg/cluster/manager/manager.go b/pkg/cluster/manager/manager.go new file mode 100644 index 0000000000..724105ef77 --- /dev/null +++ b/pkg/cluster/manager/manager.go @@ -0,0 +1,136 @@ +// Copyright 2020 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + +package manager + +import ( + "fmt" + "strings" + + "github.com/fatih/color" + "github.com/joomcode/errorx" + perrs "github.com/pingcap/errors" + "github.com/pingcap/tiup/pkg/cliutil" + operator "github.com/pingcap/tiup/pkg/cluster/operation" + "github.com/pingcap/tiup/pkg/cluster/spec" + "github.com/pingcap/tiup/pkg/cluster/task" + "github.com/pingcap/tiup/pkg/errutil" + "github.com/pingcap/tiup/pkg/logger/log" + "github.com/pingcap/tiup/pkg/set" + "github.com/pingcap/tiup/pkg/utils" +) + +var ( + errNSDeploy = errorx.NewNamespace("deploy") + errDeployNameDuplicate = errNSDeploy.NewType("name_dup", errutil.ErrTraitPreCheck) + + errNSRename = errorx.NewNamespace("rename") + errorRenameNameNotExist = errNSRename.NewType("name_not_exist", errutil.ErrTraitPreCheck) + errorRenameNameDuplicate = errNSRename.NewType("name_dup", errutil.ErrTraitPreCheck) +) + +// Manager to deploy a cluster. +type Manager struct { + sysName string + specManager *spec.SpecManager + bindVersion spec.BindVersion +} + +// NewManager create a Manager. +func NewManager(sysName string, specManager *spec.SpecManager, bindVersion spec.BindVersion) *Manager { + return &Manager{ + sysName: sysName, + specManager: specManager, + bindVersion: bindVersion, + } +} + +func (m *Manager) meta(name string) (metadata spec.Metadata, err error) { + exist, err := m.specManager.Exist(name) + if err != nil { + return nil, perrs.AddStack(err) + } + + if !exist { + return nil, perrs.Errorf("%s cluster `%s` not exists", m.sysName, name) + } + + metadata = m.specManager.NewMetadata() + err = m.specManager.Metadata(name, metadata) + if err != nil { + return metadata, perrs.AddStack(err) + } + + return metadata, nil +} + +func (m *Manager) confirmTopology(name, version string, topo spec.Topology, patchedRoles set.StringSet) error { + log.Infof("Please confirm your topology:") + + cyan := color.New(color.FgCyan, color.Bold) + fmt.Printf("Cluster type: %s\n", cyan.Sprint(m.sysName)) + fmt.Printf("Cluster name: %s\n", cyan.Sprint(name)) + fmt.Printf("Cluster version: %s\n", cyan.Sprint(version)) + if topo.BaseTopo().GlobalOptions.TLSEnabled { + fmt.Printf("TLS encryption: %s\n", cyan.Sprint("enabled")) + } + + clusterTable := [][]string{ + // Header + {"Type", "Host", "Ports", "OS/Arch", "Directories"}, + } + + topo.IterInstance(func(instance spec.Instance) { + comp := instance.ComponentName() + if patchedRoles.Exist(comp) { + comp += " (patched)" + } + clusterTable = append(clusterTable, []string{ + comp, + instance.GetHost(), + utils.JoinInt(instance.UsedPorts(), "/"), + cliutil.OsArch(instance.OS(), instance.Arch()), + strings.Join(instance.UsedDirs(), ","), + }) + }) + + cliutil.PrintTable(clusterTable, true) + + log.Warnf("Attention:") + log.Warnf(" 1. If the topology is not what you expected, check your yaml file.") + log.Warnf(" 2. Please confirm there is no port/directory conflicts in same host.") + if len(patchedRoles) != 0 { + log.Errorf(" 3. The component marked as `patched` has been replaced by previous patch commanm.") + } + + if spec, ok := topo.(*spec.Specification); ok { + if len(spec.TiSparkMasters) > 0 || len(spec.TiSparkWorkers) > 0 { + cyan := color.New(color.FgCyan, color.Bold) + msg := cyan.Sprint(`There are TiSpark nodes defined in the topology, please note that you'll need to manually install Java Runtime Environment (JRE) 8 on the host, otherwise the TiSpark nodes will fail to start. +You may read the OpenJDK doc for a reference: https://openjdk.java.net/install/ + `) + log.Warnf(msg) + } + } + + return cliutil.PromptForConfirmOrAbortError("Do you want to continue? [y/N]: ") +} + +func (m *Manager) sshTaskBuilder(name string, topo spec.Topology, user string, opts operator.Options) *task.Builder { + return task.NewBuilder(). + SSHKeySet( + m.specManager.Path(name, "ssh", "id_rsa"), + m.specManager.Path(name, "ssh", "id_rsa.pub"), + ). + ClusterSSH(topo, user, opts.SSHTimeout, opts.SSHType, topo.BaseTopo().GlobalOptions.SSHType) +} diff --git a/pkg/cluster/manager_test.go b/pkg/cluster/manager/manager_test.go similarity index 97% rename from pkg/cluster/manager_test.go rename to pkg/cluster/manager/manager_test.go index 13a36d41a5..87f045422a 100644 --- a/pkg/cluster/manager_test.go +++ b/pkg/cluster/manager/manager_test.go @@ -11,7 +11,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package cluster +package manager import ( "testing" @@ -45,7 +45,7 @@ global: user: "test1" ssh_port: 220 deploy_dir: "test-deploy" - data_dir: "test-data" + data_dir: "test-data" tidb_servers: - host: 172.16.5.138 deploy_dir: "tidb-deploy" diff --git a/pkg/cluster/manager/patch.go b/pkg/cluster/manager/patch.go new file mode 100644 index 0000000000..265f78db56 --- /dev/null +++ b/pkg/cluster/manager/patch.go @@ -0,0 +1,174 @@ +// Copyright 2020 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + +package manager + +import ( + "fmt" + "os" + "os/exec" + "path" + + "github.com/joomcode/errorx" + perrs "github.com/pingcap/errors" + "github.com/pingcap/tiup/pkg/cluster/clusterutil" + operator "github.com/pingcap/tiup/pkg/cluster/operation" + "github.com/pingcap/tiup/pkg/cluster/spec" + "github.com/pingcap/tiup/pkg/cluster/task" + "github.com/pingcap/tiup/pkg/set" + "github.com/pingcap/tiup/pkg/utils" +) + +// Patch the cluster. +func (m *Manager) Patch(name string, packagePath string, opt operator.Options, overwrite bool) error { + metadata, err := m.meta(name) + if err != nil { + return perrs.AddStack(err) + } + + topo := metadata.GetTopology() + base := metadata.GetBaseMeta() + + if exist := utils.IsExist(packagePath); !exist { + return perrs.New("specified package not exists") + } + + insts, err := instancesToPatch(topo, opt) + if err != nil { + return err + } + if err := checkPackage(m.bindVersion, m.specManager, name, insts[0].ComponentName(), insts[0].OS(), insts[0].Arch(), packagePath); err != nil { + return err + } + + var replacePackageTasks []task.Task + for _, inst := range insts { + deployDir := spec.Abs(base.User, inst.DeployDir()) + tb := task.NewBuilder() + tb.BackupComponent(inst.ComponentName(), base.Version, inst.GetHost(), deployDir). + InstallPackage(packagePath, inst.GetHost(), deployDir) + replacePackageTasks = append(replacePackageTasks, tb.Build()) + } + + tlsCfg, err := topo.TLSConfig(m.specManager.Path(name, spec.TLSCertKeyDir)) + if err != nil { + return err + } + t := m.sshTaskBuilder(name, topo, base.User, opt). + Parallel(false, replacePackageTasks...). + Func("UpgradeCluster", func(ctx *task.Context) error { + return operator.Upgrade(ctx, topo, opt, tlsCfg) + }). + Build() + + if err := t.Execute(task.NewContext()); err != nil { + if errorx.Cast(err) != nil { + // FIXME: Map possible task errors and give suggestions. + return err + } + return perrs.Trace(err) + } + + if overwrite { + if err := overwritePatch(m.specManager, name, insts[0].ComponentName(), packagePath); err != nil { + return err + } + } + + return nil +} + +func checkPackage(bindVersion spec.BindVersion, specManager *spec.SpecManager, name, comp, nodeOS, arch, packagePath string) error { + metadata := specManager.NewMetadata() + if err := specManager.Metadata(name, metadata); err != nil { + return err + } + + ver := bindVersion(comp, metadata.GetBaseMeta().Version) + repo, err := clusterutil.NewRepository(nodeOS, arch) + if err != nil { + return err + } + entry, err := repo.ComponentBinEntry(comp, ver) + if err != nil { + return err + } + + checksum, err := utils.Checksum(packagePath) + if err != nil { + return err + } + cacheDir := specManager.Path(name, "cache", comp+"-"+checksum[:7]) + if err := os.MkdirAll(cacheDir, 0755); err != nil { + return err + } + if err := exec.Command("tar", "-xvf", packagePath, "-C", cacheDir).Run(); err != nil { + return err + } + + if exists := utils.IsExist(path.Join(cacheDir, entry)); !exists { + return fmt.Errorf("entry %s not found in package %s", entry, packagePath) + } + + return nil +} + +func overwritePatch(specManager *spec.SpecManager, name, comp, packagePath string) error { + if err := os.MkdirAll(specManager.Path(name, spec.PatchDirName), 0755); err != nil { + return err + } + + checksum, err := utils.Checksum(packagePath) + if err != nil { + return err + } + + tg := specManager.Path(name, spec.PatchDirName, comp+"-"+checksum[:7]+".tar.gz") + if !utils.IsExist(tg) { + if err := utils.Copy(packagePath, tg); err != nil { + return err + } + } + + symlink := specManager.Path(name, spec.PatchDirName, comp+".tar.gz") + if utils.IsSymExist(symlink) { + os.Remove(symlink) + } + return os.Symlink(tg, symlink) +} + +func instancesToPatch(topo spec.Topology, options operator.Options) ([]spec.Instance, error) { + roleFilter := set.NewStringSet(options.Roles...) + nodeFilter := set.NewStringSet(options.Nodes...) + components := topo.ComponentsByStartOrder() + components = operator.FilterComponent(components, roleFilter) + + instances := []spec.Instance{} + comps := []string{} + for _, com := range components { + insts := operator.FilterInstance(com.Instances(), nodeFilter) + if len(insts) > 0 { + comps = append(comps, com.Name()) + } + instances = append(instances, insts...) + } + if len(comps) > 1 { + return nil, fmt.Errorf("can't patch more than one component at once: %v", comps) + } + + if len(instances) == 0 { + return nil, fmt.Errorf("no instance found on specifid role(%v) and nodes(%v)", options.Roles, options.Nodes) + } + + return instances, nil +} diff --git a/pkg/cluster/manager/reload.go b/pkg/cluster/manager/reload.go new file mode 100644 index 0000000000..82a384df22 --- /dev/null +++ b/pkg/cluster/manager/reload.go @@ -0,0 +1,95 @@ +// Copyright 2020 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + +package manager + +import ( + "github.com/joomcode/errorx" + perrs "github.com/pingcap/errors" + operator "github.com/pingcap/tiup/pkg/cluster/operation" + "github.com/pingcap/tiup/pkg/cluster/spec" + "github.com/pingcap/tiup/pkg/cluster/task" + "github.com/pingcap/tiup/pkg/logger/log" +) + +// Reload the cluster. +func (m *Manager) Reload(name string, opt operator.Options, skipRestart bool) error { + sshTimeout := opt.SSHTimeout + + metadata, err := m.meta(name) + if err != nil { + return perrs.AddStack(err) + } + + topo := metadata.GetTopology() + base := metadata.GetBaseMeta() + + uniqueHosts := make(map[string]hostInfo) // host -> ssh-port, os, arch + topo.IterInstance(func(inst spec.Instance) { + if _, found := uniqueHosts[inst.GetHost()]; !found { + uniqueHosts[inst.GetHost()] = hostInfo{ + ssh: inst.GetSSHPort(), + os: inst.OS(), + arch: inst.Arch(), + } + } + }) + + refreshConfigTasks, hasImported := buildRegenConfigTasks(m, name, topo, base, nil) + monitorConfigTasks := buildRefreshMonitoredConfigTasks( + m.specManager, + name, + uniqueHosts, + *topo.BaseTopo().GlobalOptions, + topo.GetMonitoredOptions(), + sshTimeout, + opt.SSHType) + + // handle dir scheme changes + if hasImported { + if err := spec.HandleImportPathMigration(name); err != nil { + return perrs.AddStack(err) + } + } + + tb := m.sshTaskBuilder(name, topo, base.User, opt). + ParallelStep("+ Refresh instance configs", opt.Force, refreshConfigTasks...) + + if len(monitorConfigTasks) > 0 { + tb = tb.ParallelStep("+ Refresh monitor configs", opt.Force, monitorConfigTasks...) + } + + tlsCfg, err := topo.TLSConfig(m.specManager.Path(name, spec.TLSCertKeyDir)) + if err != nil { + return err + } + if !skipRestart { + tb = tb.Func("UpgradeCluster", func(ctx *task.Context) error { + return operator.Upgrade(ctx, topo, opt, tlsCfg) + }) + } + + t := tb.Build() + + if err := t.Execute(task.NewContext()); err != nil { + if errorx.Cast(err) != nil { + // FIXME: Map possible task errors and give suggestions. + return err + } + return perrs.Trace(err) + } + + log.Infof("Reloaded cluster `%s` successfully", name) + + return nil +} diff --git a/pkg/cluster/manager/rename.go b/pkg/cluster/manager/rename.go new file mode 100644 index 0000000000..54f1194f0d --- /dev/null +++ b/pkg/cluster/manager/rename.go @@ -0,0 +1,53 @@ +// Copyright 2020 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + +package manager + +import ( + "os" + + perrs "github.com/pingcap/errors" + "github.com/pingcap/tiup/pkg/cliutil" + operator "github.com/pingcap/tiup/pkg/cluster/operation" + "github.com/pingcap/tiup/pkg/cluster/spec" + "github.com/pingcap/tiup/pkg/logger/log" + "github.com/pingcap/tiup/pkg/utils" +) + +// Rename the cluster +func (m *Manager) Rename(name string, opt operator.Options, newName string) error { + if !utils.IsExist(m.specManager.Path(name)) { + return errorRenameNameNotExist. + New("Cluster name '%s' not exist", name). + WithProperty(cliutil.SuggestionFromFormat("Please double check your cluster name")) + } + if utils.IsExist(m.specManager.Path(newName)) { + return errorRenameNameDuplicate. + New("Cluster name '%s' is duplicated", newName). + WithProperty(cliutil.SuggestionFromFormat("Please specify another cluster name")) + } + + _, err := m.meta(name) + if err != nil { // refuse renaming if current cluster topology is not valid + return perrs.AddStack(err) + } + + if err := os.Rename(m.specManager.Path(name), m.specManager.Path(newName)); err != nil { + return perrs.AddStack(err) + } + + log.Infof("Rename cluster `%s` -> `%s` successfully", name, newName) + + opt.Roles = []string{spec.ComponentGrafana, spec.ComponentPrometheus} + return m.Reload(newName, opt, false) +} diff --git a/pkg/cluster/manager/scale_in.go b/pkg/cluster/manager/scale_in.go new file mode 100644 index 0000000000..ebf155792b --- /dev/null +++ b/pkg/cluster/manager/scale_in.go @@ -0,0 +1,109 @@ +// Copyright 2020 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + +package manager + +import ( + "crypto/tls" + "errors" + "strings" + + "github.com/fatih/color" + "github.com/joomcode/errorx" + perrs "github.com/pingcap/errors" + "github.com/pingcap/tiup/pkg/cliutil" + operator "github.com/pingcap/tiup/pkg/cluster/operation" + "github.com/pingcap/tiup/pkg/cluster/spec" + "github.com/pingcap/tiup/pkg/cluster/task" + "github.com/pingcap/tiup/pkg/logger/log" + "github.com/pingcap/tiup/pkg/meta" +) + +// ScaleIn the cluster. +func (m *Manager) ScaleIn( + name string, + skipConfirm bool, + gOpt operator.Options, + scale func(builer *task.Builder, metadata spec.Metadata, tlsCfg *tls.Config), +) error { + var ( + force bool = gOpt.Force + nodes []string = gOpt.Nodes + ) + if !skipConfirm { + if err := cliutil.PromptForConfirmOrAbortError( + "This operation will delete the %s nodes in `%s` and all their data.\nDo you want to continue? [y/N]:", + strings.Join(nodes, ","), + color.HiYellowString(name)); err != nil { + return err + } + + if force { + if err := cliutil.PromptForConfirmOrAbortError( + "Forcing scale in is unsafe and may result in data lost for stateful components.\nDo you want to continue? [y/N]:", + ); err != nil { + return err + } + } + + log.Infof("Scale-in nodes...") + } + + metadata, err := m.meta(name) + if err != nil && !errors.Is(perrs.Cause(err), meta.ErrValidate) && + !errors.Is(perrs.Cause(err), spec.ErrMultipleTiSparkMaster) && + !errors.Is(perrs.Cause(err), spec.ErrMultipleTisparkWorker) { + // ignore conflict check error, node may be deployed by former version + // that lack of some certain conflict checks + return perrs.AddStack(err) + } + + topo := metadata.GetTopology() + base := metadata.GetBaseMeta() + + // Regenerate configuration + regenConfigTasks, hasImported := buildRegenConfigTasks(m, name, topo, base, nodes) + + // handle dir scheme changes + if hasImported { + if err := spec.HandleImportPathMigration(name); err != nil { + return err + } + } + + tlsCfg, err := topo.TLSConfig(m.specManager.Path(name, spec.TLSCertKeyDir)) + if err != nil { + return err + } + + b := m.sshTaskBuilder(name, topo, base.User, gOpt) + + scale(b, metadata, tlsCfg) + + t := b. + ParallelStep("+ Refresh instance configs", force, regenConfigTasks...). + Parallel(force, buildDynReloadPromTasks(metadata.GetTopology())...). + Build() + + if err := t.Execute(task.NewContext()); err != nil { + if errorx.Cast(err) != nil { + // FIXME: Map possible task errors and give suggestions. + return err + } + return perrs.Trace(err) + } + + log.Infof("Scaled cluster `%s` in successfully", name) + + return nil +} diff --git a/pkg/cluster/manager/scale_out.go b/pkg/cluster/manager/scale_out.go new file mode 100644 index 0000000000..c57acdeb7d --- /dev/null +++ b/pkg/cluster/manager/scale_out.go @@ -0,0 +1,169 @@ +// Copyright 2020 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + +package manager + +import ( + "errors" + "time" + + "github.com/joomcode/errorx" + perrs "github.com/pingcap/errors" + "github.com/pingcap/tiup/pkg/cliutil" + "github.com/pingcap/tiup/pkg/cluster/api" + "github.com/pingcap/tiup/pkg/cluster/executor" + operator "github.com/pingcap/tiup/pkg/cluster/operation" + "github.com/pingcap/tiup/pkg/cluster/spec" + "github.com/pingcap/tiup/pkg/cluster/task" + "github.com/pingcap/tiup/pkg/logger/log" + "github.com/pingcap/tiup/pkg/set" + "github.com/pingcap/tiup/pkg/utils" +) + +// ScaleOutOptions contains the options for scale out. +type ScaleOutOptions struct { + User string // username to login to the SSH server + SkipCreateUser bool // don't create user + IdentityFile string // path to the private key file + UsePassword bool // use password instead of identity file for ssh connection + NoLabels bool // don't check labels for TiKV instance +} + +// ScaleOut scale out the cluster. +func (m *Manager) ScaleOut( + name string, + topoFile string, + afterDeploy func(b *task.Builder, newPart spec.Topology), + final func(b *task.Builder, name string, meta spec.Metadata), + opt ScaleOutOptions, + skipConfirm bool, + gOpt operator.Options, +) error { + metadata, err := m.meta(name) + // allow specific validation errors so that user can recover a broken + // cluster if it is somehow in a bad state. + if err != nil && + !errors.Is(perrs.Cause(err), spec.ErrNoTiSparkMaster) { + return perrs.AddStack(err) + } + + topo := metadata.GetTopology() + base := metadata.GetBaseMeta() + // Inherit existing global configuration. We must assign the inherited values before unmarshalling + // because some default value rely on the global options and monitored options. + newPart := topo.NewPart() + + // The no tispark master error is ignored, as if the tispark master is removed from the topology + // file for some reason (manual edit, for example), it is still possible to scale-out it to make + // the whole topology back to normal state. + if err := spec.ParseTopologyYaml(topoFile, newPart); err != nil && + !errors.Is(perrs.Cause(err), spec.ErrNoTiSparkMaster) { + return err + } + + if err := validateNewTopo(newPart); err != nil { + return err + } + + // Abort scale out operation if the merged topology is invalid + mergedTopo := topo.MergeTopo(newPart) + if err := mergedTopo.Validate(); err != nil { + return err + } + spec.ExpandRelativeDir(mergedTopo) + + if topo, ok := topo.(*spec.Specification); ok && !opt.NoLabels { + // Check if TiKV's label set correctly + pdList := topo.BaseTopo().MasterList + tlsCfg, err := topo.TLSConfig(m.specManager.Path(name, spec.TLSCertKeyDir)) + if err != nil { + return err + } + pdClient := api.NewPDClient(pdList, 10*time.Second, tlsCfg) + lbs, err := pdClient.GetLocationLabels() + if err != nil { + return err + } + if err := spec.CheckTiKVLabels(lbs, mergedTopo.(*spec.Specification)); err != nil { + return perrs.Errorf("check TiKV label failed, please fix that before continue:\n%s", err) + } + } + + clusterList, err := m.specManager.GetAllClusters() + if err != nil { + return err + } + if err := spec.CheckClusterPortConflict(clusterList, name, mergedTopo); err != nil { + return err + } + if err := spec.CheckClusterDirConflict(clusterList, name, mergedTopo); err != nil { + return err + } + + patchedComponents := set.NewStringSet() + newPart.IterInstance(func(instance spec.Instance) { + if utils.IsExist(m.specManager.Path(name, spec.PatchDirName, instance.ComponentName()+".tar.gz")) { + patchedComponents.Insert(instance.ComponentName()) + } + }) + + if !skipConfirm { + // patchedComponents are components that have been patched and overwrited + if err := m.confirmTopology(name, base.Version, newPart, patchedComponents); err != nil { + return err + } + } + + var sshConnProps *cliutil.SSHConnectionProps = &cliutil.SSHConnectionProps{} + if gOpt.SSHType != executor.SSHTypeNone { + var err error + if sshConnProps, err = cliutil.ReadIdentityFileOrPassword(opt.IdentityFile, opt.UsePassword); err != nil { + return err + } + } + + // Build the scale out tasks + t, err := buildScaleOutTask( + m, name, metadata, mergedTopo, opt, sshConnProps, newPart, + patchedComponents, gOpt, afterDeploy, final) + if err != nil { + return err + } + + if err := t.Execute(task.NewContext()); err != nil { + if errorx.Cast(err) != nil { + // FIXME: Map possible task errors and give suggestions. + return err + } + return perrs.Trace(err) + } + + log.Infof("Scaled cluster `%s` out successfully", name) + + return nil +} + +// validateNewTopo checks the new part of scale-out topology to make sure it's supported +func validateNewTopo(topo spec.Topology) (err error) { + topo.IterInstance(func(instance spec.Instance) { + // check for "imported" parameter, it can not be true when scaling out + if instance.IsImported() { + err = errors.New( + "'imported' is set to 'true' for new instance, this is only used " + + "for instances imported from tidb-ansible and make no sense when " + + "scaling out, please delete the line or set it to 'false' for new instances") + return + } + }) + return err +} diff --git a/pkg/cluster/manager/upgrade.go b/pkg/cluster/manager/upgrade.go new file mode 100644 index 0000000000..0f87980550 --- /dev/null +++ b/pkg/cluster/manager/upgrade.go @@ -0,0 +1,209 @@ +// Copyright 2020 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + +package manager + +import ( + "fmt" + "os" + + "github.com/fatih/color" + "github.com/joomcode/errorx" + perrs "github.com/pingcap/errors" + "github.com/pingcap/tiup/pkg/cliutil" + operator "github.com/pingcap/tiup/pkg/cluster/operation" + "github.com/pingcap/tiup/pkg/cluster/spec" + "github.com/pingcap/tiup/pkg/cluster/task" + "github.com/pingcap/tiup/pkg/environment" + "github.com/pingcap/tiup/pkg/logger/log" + "github.com/pingcap/tiup/pkg/meta" + "github.com/pingcap/tiup/pkg/version" + "golang.org/x/mod/semver" +) + +// Upgrade the cluster. +func (m *Manager) Upgrade(name string, clusterVersion string, opt operator.Options, skipConfirm bool) error { + metadata, err := m.meta(name) + if err != nil { + return perrs.AddStack(err) + } + + topo := metadata.GetTopology() + base := metadata.GetBaseMeta() + + var ( + downloadCompTasks []task.Task // tasks which are used to download components + copyCompTasks []task.Task // tasks which are used to copy components to remote host + + uniqueComps = map[string]struct{}{} + ) + + if err := versionCompare(base.Version, clusterVersion); err != nil { + return err + } + + if !skipConfirm { + if err := cliutil.PromptForConfirmOrAbortError( + "This operation will upgrade %s %s cluster %s to %s.\nDo you want to continue? [y/N]:", + m.sysName, + color.HiYellowString(base.Version), + color.HiYellowString(name), + color.HiYellowString(clusterVersion)); err != nil { + return err + } + log.Infof("Upgrading cluster...") + } + + hasImported := false + for _, comp := range topo.ComponentsByUpdateOrder() { + for _, inst := range comp.Instances() { + compName := inst.ComponentName() + version := m.bindVersion(inst.ComponentName(), clusterVersion) + + // Download component from repository + key := fmt.Sprintf("%s-%s-%s-%s", compName, version, inst.OS(), inst.Arch()) + if _, found := uniqueComps[key]; !found { + uniqueComps[key] = struct{}{} + t := task.NewBuilder(). + Download(inst.ComponentName(), inst.OS(), inst.Arch(), version). + Build() + downloadCompTasks = append(downloadCompTasks, t) + } + + deployDir := spec.Abs(base.User, inst.DeployDir()) + // data dir would be empty for components which don't need it + dataDirs := spec.MultiDirAbs(base.User, inst.DataDir()) + // log dir will always be with values, but might not used by the component + logDir := spec.Abs(base.User, inst.LogDir()) + + // Deploy component + tb := task.NewBuilder() + if inst.IsImported() { + switch inst.ComponentName() { + case spec.ComponentPrometheus, spec.ComponentGrafana, spec.ComponentAlertmanager: + tb.CopyComponent( + inst.ComponentName(), + inst.OS(), + inst.Arch(), + version, + "", // use default srcPath + inst.GetHost(), + deployDir, + ) + } + hasImported = true + } + + // backup files of the old version + tb = tb.BackupComponent(inst.ComponentName(), base.Version, inst.GetHost(), deployDir) + + if deployerInstance, ok := inst.(DeployerInstance); ok { + deployerInstance.Deploy(tb, "", deployDir, version, name, clusterVersion) + } else { + // copy dependency component if needed + switch inst.ComponentName() { + case spec.ComponentTiSpark: + env := environment.GlobalEnv() + sparkVer, _, err := env.V1Repository().LatestStableVersion(spec.ComponentSpark, false) + if err != nil { + return err + } + tb = tb.DeploySpark(inst, sparkVer.String(), "" /* default srcPath */, deployDir) + default: + tb = tb.CopyComponent( + inst.ComponentName(), + inst.OS(), + inst.Arch(), + version, + "", // use default srcPath + inst.GetHost(), + deployDir, + ) + } + } + + tb.InitConfig( + name, + clusterVersion, + m.specManager, + inst, + base.User, + opt.IgnoreConfigCheck, + meta.DirPaths{ + Deploy: deployDir, + Data: dataDirs, + Log: logDir, + Cache: m.specManager.Path(name, spec.TempConfigPath), + }, + ) + copyCompTasks = append(copyCompTasks, tb.Build()) + } + } + + // handle dir scheme changes + if hasImported { + if err := spec.HandleImportPathMigration(name); err != nil { + return err + } + } + + tlsCfg, err := topo.TLSConfig(m.specManager.Path(name, spec.TLSCertKeyDir)) + if err != nil { + return err + } + t := m.sshTaskBuilder(name, topo, base.User, opt). + Parallel(false, downloadCompTasks...). + Parallel(opt.Force, copyCompTasks...). + Func("UpgradeCluster", func(ctx *task.Context) error { + return operator.Upgrade(ctx, topo, opt, tlsCfg) + }). + Build() + + if err := t.Execute(task.NewContext()); err != nil { + if errorx.Cast(err) != nil { + // FIXME: Map possible task errors and give suggestions. + return err + } + return perrs.Trace(err) + } + + metadata.SetVersion(clusterVersion) + + if err := m.specManager.SaveMeta(name, metadata); err != nil { + return perrs.Trace(err) + } + + if err := os.RemoveAll(m.specManager.Path(name, "patch")); err != nil { + return perrs.Trace(err) + } + + log.Infof("Upgraded cluster `%s` successfully", name) + + return nil +} + +func versionCompare(curVersion, newVersion string) error { + // Can always upgrade to 'nightly' event the current version is 'nightly' + if newVersion == version.NightlyVersion { + return nil + } + + switch semver.Compare(curVersion, newVersion) { + case -1: + return nil + case 0, 1: + return perrs.Errorf("please specify a higher version than %s", curVersion) + default: + return perrs.Errorf("unreachable") + } +} diff --git a/pkg/cluster/prepare.go b/pkg/cluster/prepare.go deleted file mode 100644 index f0dfc04ca2..0000000000 --- a/pkg/cluster/prepare.go +++ /dev/null @@ -1,63 +0,0 @@ -// Copyright 2020 PingCAP, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// See the License for the specific language governing permissions and -// limitations under the License. - -package cluster - -import ( - "fmt" - - "github.com/pingcap/tiup/pkg/cluster/spec" - "github.com/pingcap/tiup/pkg/cluster/task" -) - -// InstanceIter to iterate instance. -type InstanceIter interface { - IterInstance(func(inst spec.Instance)) -} - -// BuildDownloadCompTasks build download component tasks -func BuildDownloadCompTasks(clusterVersion string, instanceIter InstanceIter, bindVersion spec.BindVersion) []*task.StepDisplay { - var tasks []*task.StepDisplay - uniqueTaskList := make(map[string]struct{}) // map["comp-os-arch"]{} - instanceIter.IterInstance(func(inst spec.Instance) { - key := fmt.Sprintf("%s-%s-%s", inst.ComponentName(), inst.OS(), inst.Arch()) - if _, found := uniqueTaskList[key]; !found { - uniqueTaskList[key] = struct{}{} - - // we don't set version for tispark, so the lastest tispark will be used - var version string - if inst.ComponentName() == spec.ComponentTiSpark { - // download spark as dependency of tispark - tasks = append(tasks, buildDownloadSparkTask(inst)) - } else { - version = bindVersion(inst.ComponentName(), clusterVersion) - } - - t := task.NewBuilder(). - Download(inst.ComponentName(), inst.OS(), inst.Arch(), version). - BuildAsStep(fmt.Sprintf(" - Download %s:%s (%s/%s)", - inst.ComponentName(), version, inst.OS(), inst.Arch())) - tasks = append(tasks, t) - } - }) - return tasks -} - -// buildDownloadSparkTask build download task for spark, which is a dependency of tispark -// FIXME: this is a hack and should be replaced by dependency handling in manifest processing -func buildDownloadSparkTask(inst spec.Instance) *task.StepDisplay { - return task.NewBuilder(). - Download(spec.ComponentSpark, inst.OS(), inst.Arch(), ""). - BuildAsStep(fmt.Sprintf(" - Download %s: (%s/%s)", - spec.ComponentSpark, inst.OS(), inst.Arch())) -} diff --git a/pkg/cluster/task/action.go b/pkg/cluster/task/action.go index e600998a7b..fd7bbdbce1 100644 --- a/pkg/cluster/task/action.go +++ b/pkg/cluster/task/action.go @@ -32,51 +32,49 @@ type ClusterOperate struct { // Execute implements the Task interface func (c *ClusterOperate) Execute(ctx *Context) error { + var ( + err error + printStatus bool = true + ) + var opErrMsg = [...]string{ + "failed to start", + "failed to stop", + "failed to restart", + "failed to destroy", + "failed to upgrade", + "failed to scale in", + "failed to scale out", + "failed to destroy tombstone", + } switch c.op { case operator.StartOperation: - err := operator.Start(ctx, c.spec, c.options, c.tlsCfg) - if err != nil { - return errors.Annotate(err, "failed to start") - } - operator.PrintClusterStatus(ctx, c.spec) + err = operator.Start(ctx, c.spec, c.options, c.tlsCfg) case operator.StopOperation: - err := operator.Stop(ctx, c.spec, c.options, c.tlsCfg) - if err != nil { - return errors.Annotate(err, "failed to stop") - } - operator.PrintClusterStatus(ctx, c.spec) + err = operator.Stop(ctx, c.spec, c.options, c.tlsCfg) case operator.RestartOperation: - err := operator.Restart(ctx, c.spec, c.options, c.tlsCfg) - if err != nil { - return errors.Annotate(err, "failed to restart") - } - operator.PrintClusterStatus(ctx, c.spec) - case operator.UpgradeOperation: - err := operator.Upgrade(ctx, c.spec, c.options, c.tlsCfg) - if err != nil { - return errors.Annotate(err, "failed to upgrade") - } - operator.PrintClusterStatus(ctx, c.spec) + err = operator.Restart(ctx, c.spec, c.options, c.tlsCfg) case operator.DestroyOperation: - err := operator.Destroy(ctx, c.spec, c.options) - if err != nil { - return errors.Annotate(err, "failed to destroy") - } - case operator.DestroyTombstoneOperation: - _, err := operator.DestroyTombstone(ctx, c.spec, false, c.options, c.tlsCfg) - if err != nil { - return errors.Annotate(err, "failed to destroy") - } - // print nothing + err = operator.Destroy(ctx, c.spec, c.options) + case operator.UpgradeOperation: + err = operator.Upgrade(ctx, c.spec, c.options, c.tlsCfg) case operator.ScaleInOperation: - err := operator.ScaleIn(ctx, c.spec, c.options, c.tlsCfg) - if err != nil { - return errors.Annotate(err, "failed to scale in") - } + err = operator.ScaleIn(ctx, c.spec, c.options, c.tlsCfg) + printStatus = false + case operator.DestroyTombstoneOperation: + _, err = operator.DestroyTombstone(ctx, c.spec, false, c.options, c.tlsCfg) + printStatus = false default: return errors.Errorf("nonsupport %s", c.op) } + if err != nil { + return errors.Annotatef(err, opErrMsg[c.op]) + } + + if printStatus { + operator.PrintClusterStatus(ctx, c.spec) + } + return nil } diff --git a/pkg/repository/mirror.go b/pkg/repository/mirror.go index 3451c1685e..1c43d13a6f 100644 --- a/pkg/repository/mirror.go +++ b/pkg/repository/mirror.go @@ -361,7 +361,7 @@ func (l *httpMirror) Rotate(m *v1manifest.Manifest) error { rotateAddr := fmt.Sprintf("%s/api/v1/rotate", l.Source()) data, err := json.Marshal(m) if err != nil { - return errors.Annotate(err, "marshal root manfiest") + return errors.Annotate(err, "marshal root manifest") } client := http.Client{Timeout: time.Minute}