Skip to content

Commit

Permalink
cluster: display memory usage (#1994)
Browse files Browse the repository at this point in the history
  • Loading branch information
kaaaaaaang authored Sep 22, 2022
1 parent 7e0cd81 commit c294184
Show file tree
Hide file tree
Showing 9 changed files with 95 additions and 120 deletions.
25 changes: 13 additions & 12 deletions components/cluster/command/display.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,11 @@ import (

func newDisplayCmd() *cobra.Command {
var (
clusterName string
showDashboardOnly bool
showVersionOnly bool
showTiKVLabels bool
statusTimeout uint64
dopt manager.DisplayOption
)
cmd := &cobra.Command{
Use: "display <cluster-name>",
Expand All @@ -43,20 +43,20 @@ func newDisplayCmd() *cobra.Command {
}

gOpt.APITimeout = statusTimeout
clusterName = args[0]
clusterReport.ID = scrubClusterName(clusterName)
teleCommand = append(teleCommand, scrubClusterName(clusterName))
dopt.ClusterName = args[0]
clusterReport.ID = scrubClusterName(dopt.ClusterName)
teleCommand = append(teleCommand, scrubClusterName(dopt.ClusterName))

exist, err := tidbSpec.Exist(clusterName)
exist, err := tidbSpec.Exist(dopt.ClusterName)
if err != nil {
return err
}

if !exist {
return perrs.Errorf("Cluster %s not found", clusterName)
return perrs.Errorf("Cluster %s not found", dopt.ClusterName)
}

metadata, err := spec.ClusterMetadata(clusterName)
metadata, err := spec.ClusterMetadata(dopt.ClusterName)
if err != nil && !errors.Is(perrs.Cause(err), meta.ErrValidate) &&
!errors.Is(perrs.Cause(err), spec.ErrNoTiSparkMaster) {
return err
Expand All @@ -68,16 +68,16 @@ func newDisplayCmd() *cobra.Command {
}

if showDashboardOnly {
tlsCfg, err := metadata.Topology.TLSConfig(tidbSpec.Path(clusterName, spec.TLSCertKeyDir))
tlsCfg, err := metadata.Topology.TLSConfig(tidbSpec.Path(dopt.ClusterName, spec.TLSCertKeyDir))
if err != nil {
return err
}
return cm.DisplayDashboardInfo(clusterName, time.Second*time.Duration(gOpt.APITimeout), tlsCfg)
return cm.DisplayDashboardInfo(dopt.ClusterName, time.Second*time.Duration(gOpt.APITimeout), tlsCfg)
}
if showTiKVLabels {
return cm.DisplayTiKVLabels(clusterName, gOpt)
return cm.DisplayTiKVLabels(dopt, gOpt)
}
return cm.Display(clusterName, gOpt)
return cm.Display(dopt, gOpt)
},
ValidArgsFunction: func(cmd *cobra.Command, args []string, toComplete string) ([]string, cobra.ShellCompDirective) {
switch len(args) {
Expand All @@ -91,10 +91,11 @@ func newDisplayCmd() *cobra.Command {

cmd.Flags().StringSliceVarP(&gOpt.Roles, "role", "R", nil, "Only display specified roles")
cmd.Flags().StringSliceVarP(&gOpt.Nodes, "node", "N", nil, "Only display specified nodes")
cmd.Flags().BoolVar(&gOpt.ShowUptime, "uptime", false, "Display with uptime")
cmd.Flags().BoolVar(&dopt.ShowUptime, "uptime", false, "Display with uptime")
cmd.Flags().BoolVar(&showDashboardOnly, "dashboard", false, "Only display TiDB Dashboard information")
cmd.Flags().BoolVar(&showVersionOnly, "version", false, "Only display TiDB cluster version")
cmd.Flags().BoolVar(&showTiKVLabels, "labels", false, "Only display labels of specified TiKV role or nodes")
cmd.Flags().BoolVar(&dopt.ShowProcess, "process", false, "display cpu and memory usage of nodes")
cmd.Flags().Uint64Var(&statusTimeout, "status-timeout", 10, "Timeout in seconds when getting node status")

return cmd
Expand Down
10 changes: 5 additions & 5 deletions components/dm/command/display.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ import (

func newDisplayCmd() *cobra.Command {
var (
clusterName string
dopt manager.DisplayOption
showVersionOnly bool
statusTimeout uint64
)
Expand All @@ -40,18 +40,18 @@ func newDisplayCmd() *cobra.Command {
}

gOpt.APITimeout = statusTimeout
clusterName = args[0]
dopt.ClusterName = args[0]

if showVersionOnly {
metadata, err := spec.ClusterMetadata(clusterName)
metadata, err := spec.ClusterMetadata(dopt.ClusterName)
if err != nil && !errors.Is(perrs.Cause(err), meta.ErrValidate) {
return err
}
fmt.Println(metadata.Version)
return nil
}

return cm.Display(clusterName, gOpt)
return cm.Display(dopt, gOpt)
},
ValidArgsFunction: func(cmd *cobra.Command, args []string, toComplete string) ([]string, cobra.ShellCompDirective) {
switch len(args) {
Expand All @@ -66,7 +66,7 @@ func newDisplayCmd() *cobra.Command {
cmd.Flags().StringSliceVarP(&gOpt.Roles, "role", "R", nil, "Only display specified roles")
cmd.Flags().StringSliceVarP(&gOpt.Nodes, "node", "N", nil, "Only display specified nodes")
cmd.Flags().BoolVar(&showVersionOnly, "version", false, "Only display DM cluster version")
cmd.Flags().BoolVar(&gOpt.ShowUptime, "uptime", false, "Display DM with uptime")
cmd.Flags().BoolVar(&dopt.ShowUptime, "uptime", false, "Display DM with uptime")
cmd.Flags().Uint64Var(&statusTimeout, "status-timeout", 10, "Timeout in seconds when getting node status")

return cmd
Expand Down
83 changes: 53 additions & 30 deletions pkg/cluster/manager/display.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,17 +45,27 @@ import (
"go.uber.org/zap"
)

// DisplayOption represents option of display command
type DisplayOption struct {
ClusterName string
ShowUptime bool
ShowProcess bool
}

// InstInfo represents an instance info
type InstInfo struct {
ID string `json:"id"`
Role string `json:"role"`
Host string `json:"host"`
Ports string `json:"ports"`
OsArch string `json:"os_arch"`
Status string `json:"status"`
Since string `json:"since"`
DataDir string `json:"data_dir"`
DeployDir string `json:"deploy_dir"`
ID string `json:"id"`
Role string `json:"role"`
Host string `json:"host"`
Ports string `json:"ports"`
OsArch string `json:"os_arch"`
Status string `json:"status"`
Memory string `json:"memory"`
MemoryLimit string `json:"memory_limit"`
CPUquota string `json:"cpu_quota"`
Since string `json:"since"`
DataDir string `json:"data_dir"`
DeployDir string `json:"deploy_dir"`

ComponentName string
Port int
Expand Down Expand Up @@ -97,12 +107,13 @@ type JSONOutput struct {
}

// Display cluster meta and topology.
func (m *Manager) Display(name string, opt operator.Options) error {
func (m *Manager) Display(dopt DisplayOption, opt operator.Options) error {
name := dopt.ClusterName
if err := clusterutil.ValidateClusterNameOrError(name); err != nil {
return err
}

clusterInstInfos, err := m.GetClusterTopology(name, opt)
clusterInstInfos, err := m.GetClusterTopology(dopt, opt)
if err != nil {
return err
}
Expand Down Expand Up @@ -161,11 +172,15 @@ func (m *Manager) Display(name string, opt operator.Options) error {

// display topology
var clusterTable [][]string
if opt.ShowUptime {
clusterTable = append(clusterTable, []string{"ID", "Role", "Host", "Ports", "OS/Arch", "Status", "Since", "Data Dir", "Deploy Dir"})
} else {
clusterTable = append(clusterTable, []string{"ID", "Role", "Host", "Ports", "OS/Arch", "Status", "Data Dir", "Deploy Dir"})
rowHead := []string{"ID", "Role", "Host", "Ports", "OS/Arch", "Status"}
if dopt.ShowProcess {
rowHead = append(rowHead, "Memory", "Memory Limit", "CPU Quota")
}
if dopt.ShowUptime {
rowHead = append(rowHead, "Since")
}
rowHead = append(rowHead, "Data Dir", "Deploy Dir")
clusterTable = append(clusterTable, rowHead)

masterActive := make([]string, 0)
for _, v := range clusterInstInfos {
Expand All @@ -177,7 +192,10 @@ func (m *Manager) Display(name string, opt operator.Options) error {
v.OsArch,
formatInstanceStatus(v.Status),
}
if opt.ShowUptime {
if dopt.ShowProcess {
row = append(row, v.Memory, v.MemoryLimit, v.CPUquota)
}
if dopt.ShowUptime {
row = append(row, v.Since)
}
row = append(row, v.DataDir, v.DeployDir)
Expand Down Expand Up @@ -279,12 +297,13 @@ func getGrafanaURLStr(clusterInstInfos []InstInfo) (result string, exist bool) {
}

// DisplayTiKVLabels display cluster tikv labels
func (m *Manager) DisplayTiKVLabels(name string, opt operator.Options) error {
func (m *Manager) DisplayTiKVLabels(dopt DisplayOption, opt operator.Options) error {
name := dopt.ClusterName
if err := clusterutil.ValidateClusterNameOrError(name); err != nil {
return err
}

clusterInstInfos, err := m.GetClusterTopology(name, opt)
clusterInstInfos, err := m.GetClusterTopology(dopt, opt)
if err != nil {
return err
}
Expand Down Expand Up @@ -449,12 +468,13 @@ func (m *Manager) DisplayTiKVLabels(name string, opt operator.Options) error {
}

// GetClusterTopology get the topology of the cluster.
func (m *Manager) GetClusterTopology(name string, opt operator.Options) ([]InstInfo, error) {
func (m *Manager) GetClusterTopology(dopt DisplayOption, opt operator.Options) ([]InstInfo, error) {
ctx := ctxt.New(
context.Background(),
opt.Concurrency,
m.logger,
)
name := dopt.ClusterName
metadata, err := m.meta(name)
if err != nil && !errors.Is(perrs.Cause(err), meta.ErrValidate) &&
!errors.Is(perrs.Cause(err), spec.ErrNoTiSparkMaster) {
Expand Down Expand Up @@ -527,7 +547,7 @@ func (m *Manager) GetClusterTopology(name string, opt operator.Options) ([]InstI
dataDir = insDirs[1]
}

var status string
var status, memory string
switch ins.ComponentName() {
case spec.ComponentPD:
status = masterStatus[ins.ID()]
Expand All @@ -542,26 +562,25 @@ func (m *Manager) GetClusterTopology(name string, opt operator.Options) ([]InstI
}

since := "-"
if opt.ShowUptime {
if dopt.ShowUptime {
since = formatInstanceSince(ins.Uptime(ctx, statusTimeout, tlsCfg))
}

// Query the service status and uptime
if status == "-" || (opt.ShowUptime && since == "-") {
if status == "-" || (dopt.ShowUptime && since == "-") || dopt.ShowProcess {
e, found := ctxt.GetInner(ctx).GetExecutor(ins.GetHost())
if found {
var active string
nctx := checkpoint.NewContext(ctx)
active, _ := operator.GetServiceStatus(nctx, e, ins.ServiceName())
active, memory, _ = operator.GetServiceStatus(nctx, e, ins.ServiceName())
if status == "-" {
if parts := strings.Split(strings.TrimSpace(active), " "); len(parts) > 2 {
if parts[1] == "active" {
status = "Up"
} else {
status = parts[1]
}
if active == "active" {
status = "Up"
} else {
status = active
}
}
if opt.ShowUptime && since == "-" {
if dopt.ShowUptime && since == "-" {
since = formatInstanceSince(parseSystemctlSince(active))
}
}
Expand All @@ -572,6 +591,7 @@ func (m *Manager) GetClusterTopology(name string, opt operator.Options) ([]InstI
if ins.IsPatched() {
roleName += " (patched)"
}
rc := ins.ResourceControl()
mu.Lock()
clusterInstInfos = append(clusterInstInfos, InstInfo{
ID: ins.ID(),
Expand All @@ -580,6 +600,9 @@ func (m *Manager) GetClusterTopology(name string, opt operator.Options) ([]InstI
Ports: utils.JoinInt(ins.UsedPorts(), "/"),
OsArch: tui.OsArch(ins.OS(), ins.Arch()),
Status: status,
Memory: utils.Ternary(memory == "", "-", memory).(string),
MemoryLimit: utils.Ternary(rc.MemoryLimit == "", "-", rc.MemoryLimit).(string),
CPUquota: utils.Ternary(rc.CPUQuota == "", "-", rc.CPUQuota).(string),
DataDir: dataDir,
DeployDir: deployDir,
ComponentName: ins.ComponentName(),
Expand Down
37 changes: 0 additions & 37 deletions pkg/cluster/operation/action.go
Original file line number Diff line number Diff line change
Expand Up @@ -639,43 +639,6 @@ func StopComponent(ctx context.Context,
return errg.Wait()
}

// PrintClusterStatus print cluster status into the io.Writer.
func PrintClusterStatus(ctx context.Context, cluster *spec.Specification) (health bool) {
health = true
logger := ctx.Value(logprinter.ContextKeyLogger).(*logprinter.Logger)

for _, com := range cluster.ComponentsByStartOrder() {
if len(com.Instances()) == 0 {
continue
}

logger.Infof("Checking service state of %s", com.Name())
errg, _ := errgroup.WithContext(ctx)
for _, ins := range com.Instances() {
ins := ins

// the checkpoint part of context can't be shared between goroutines
// since it's used to trace the stack, so we must create a new layer
// of checkpoint context every time put it into a new goroutine.
nctx := checkpoint.NewContext(ctx)
errg.Go(func() error {
e := ctxt.GetInner(nctx).Get(ins.GetHost())
active, err := GetServiceStatus(nctx, e, ins.ServiceName())
if err != nil {
health = false
logger.Errorf("\t%s\t%v", ins.GetHost(), err)
} else {
logger.Infof("\t%s\t%s", ins.GetHost(), active)
}
return nil
})
}
_ = errg.Wait()
}

return
}

// toFailedActionError formats the errror msg for failed action
func toFailedActionError(err error, action string, host, service, logDir string) error {
return errors.Annotatef(err,
Expand Down
2 changes: 1 addition & 1 deletion pkg/cluster/operation/check.go
Original file line number Diff line number Diff line change
Expand Up @@ -523,7 +523,7 @@ func CheckServices(ctx context.Context, e ctxt.Executor, host, service string, d
return result
}

active, err := GetServiceStatus(ctx, e, service+".service")
active, _, err := GetServiceStatus(ctx, e, service+".service")
if err != nil {
result.Err = err
}
Expand Down
3 changes: 0 additions & 3 deletions pkg/cluster/operation/operation.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,9 +55,6 @@ type Options struct {
RetainDataRoles []string
RetainDataNodes []string

// Show uptime or not
ShowUptime bool

DisplayMode string // the output format
Operation Operation
}
Expand Down
26 changes: 17 additions & 9 deletions pkg/cluster/operation/systemd.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,22 +34,30 @@ import (
Mar 09 13:56:19 ip-172-16-5-70 systemd[1]: Started drainer-8249 service.
*/
func GetServiceStatus(ctx context.Context, e ctxt.Executor, name string) (active string, err error) {
func GetServiceStatus(ctx context.Context, e ctxt.Executor, name string) (active, memory string, err error) {
c := module.SystemdModuleConfig{
Unit: name,
Action: "status",
}
systemd := module.NewSystemdModule(c)
stdout, _, err := systemd.Execute(ctx, e)

lines := strings.Split(string(stdout), "\n")
if len(lines) >= 3 {
return lines[2], nil
}

if err != nil {
return
}

return "", errors.Errorf("unexpected output: %s", string(stdout))
lines := strings.Split(string(stdout), "\n")
for _, line := range lines {
words := strings.Split(strings.TrimSpace(line), " ")
if len(words) >= 2 {
switch words[0] {
case "Active:":
active = words[1]
case "Memory:":
memory = words[1]
}
}
}
if active == "" {
err = errors.Errorf("unexpected output: %s", string(stdout))
}
return
}
Loading

0 comments on commit c294184

Please sign in to comment.