From 107f477176a1a1b0239fe295fdb6b340e5232f57 Mon Sep 17 00:00:00 2001 From: danishprakash Date: Sun, 9 Jan 2022 18:55:29 +0530 Subject: [PATCH] command/operator_debug: add pprof interval Signed-off-by: danishprakash --- command/operator_debug.go | 55 ++++++++++++++++++++++++++++++++++++--- 1 file changed, 52 insertions(+), 3 deletions(-) diff --git a/command/operator_debug.go b/command/operator_debug.go index 446ddc03edab..914732220450 100644 --- a/command/operator_debug.go +++ b/command/operator_debug.go @@ -38,6 +38,7 @@ type OperatorDebugCommand struct { collectDir string duration time.Duration interval time.Duration + pprofInterval time.Duration pprofDuration time.Duration logLevel string maxNodes int @@ -166,6 +167,10 @@ Debug Options: The interval between snapshots of the Nomad state. Set interval equal to duration to capture a single snapshot. Defaults to 30s. + -pprof-interval= + The interval between pprof collections. Set interval equal to + duration to capture a single snapshot. Defaults to 30s. + -log-level= The log level to monitor. Defaults to DEBUG. @@ -334,7 +339,7 @@ func (c *OperatorDebugCommand) Run(args []string) int { flags := c.Meta.FlagSet(c.Name(), FlagSetClient) flags.Usage = func() { c.Ui.Output(c.Help()) } - var duration, interval, output, pprofDuration, eventTopic string + var duration, interval, pprofInterval, output, pprofDuration, eventTopic string var eventIndex int64 var nodeIDs, serverIDs string var allowStale bool @@ -350,7 +355,8 @@ func (c *OperatorDebugCommand) Run(args []string) int { flags.StringVar(&serverIDs, "server-id", "all", "") flags.BoolVar(&allowStale, "stale", false, "") flags.StringVar(&output, "output", "", "") - flags.StringVar(&pprofDuration, "pprof-duration", "1s", "") + flags.StringVar(&pprofDuration, "pprof-duration", "1m", "") + flags.StringVar(&pprofInterval, "pprof-interval", "30s", "") flags.BoolVar(&c.verbose, "verbose", false, "") c.consul = &external{tls: &api.TLSConfig{}} @@ -423,6 +429,20 @@ func (c *OperatorDebugCommand) Run(args []string) int { } c.index = uint64(eventIndex) + // Parse the pprof capture interval + pi, err := time.ParseDuration(pprofInterval) + if err != nil { + c.Ui.Error(fmt.Sprintf("Error parsing pprof-interval: %s: %s", pprofInterval, err.Error())) + return 1 + } + c.pprofInterval = pi + + // Validate interval + if pi.Seconds() > pd.Seconds() { + c.Ui.Error(fmt.Sprintf("pprof-interval %s must be less than pprof-duration %s", pprofInterval, pprofDuration)) + return 1 + } + // Verify there are no extra arguments args = flags.Args() if l := len(args); l != 0 { @@ -595,6 +615,7 @@ func (c *OperatorDebugCommand) Run(args []string) int { } c.Ui.Output(fmt.Sprintf(" Interval: %s", interval)) c.Ui.Output(fmt.Sprintf(" Duration: %s", duration)) + c.Ui.Output(fmt.Sprintf(" Pprof Interval: %s", pprofInterval)) if c.pprofDuration.Seconds() != 1 { c.Ui.Output(fmt.Sprintf(" pprof Duration: %s", c.pprofDuration)) } @@ -663,7 +684,7 @@ func (c *OperatorDebugCommand) collect(client *api.Client) error { c.collectVault(clusterDir, vaultAddr) c.collectAgentHosts(client) - c.collectPprofs(client) + c.collectPeriodicPprofs(client) c.collectPeriodic(client) @@ -876,6 +897,34 @@ func (c *OperatorDebugCommand) collectAgentHost(path, id string, client *api.Cli c.writeJSON(path, "agent-host.json", host, err) } +func (c *OperatorDebugCommand) collectPeriodicPprofs(client *api.Client) { + duration := time.After(c.duration) + // Create a ticker to execute on every interval ticks + ticker := time.NewTicker(c.pprofInterval) + // Additionally, an out of loop execute to imitate first tick + c.collectPprofs(client) + + var pprofIntervalCount int + var name string + + for { + select { + case <-duration: + c.cancel() + return + + case <-ticker.C: + name = fmt.Sprintf("%04d", pprofIntervalCount) + c.Ui.Output(fmt.Sprintf(" Capture pprofInterval %s", name)) + c.collectPprofs(client) + pprofIntervalCount++ + + case <-c.ctx.Done(): + return + } + } +} + // collectPprofs captures the /agent/pprof for each listed node func (c *OperatorDebugCommand) collectPprofs(client *api.Client) { for _, n := range c.nodeIDs {