Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Metrics gotemplate support, debug bundle features #9067

Merged
merged 14 commits into from
Oct 14, 2020
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions api/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -12,5 +12,6 @@ require (
github.com/kr/pretty v0.1.0
github.com/mitchellh/go-testing-interface v1.0.0
github.com/stretchr/testify v1.5.1
gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15 // indirect
gopkg.in/yaml.v2 v2.2.8 // indirect
)
2 changes: 2 additions & 0 deletions api/go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ github.com/stretchr/testify v1.5.1 h1:nOGnQDM7FYENwehXlg/kFVnos3rEvtKTjRvOWSzb6H
github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15 h1:YR8cESwS4TdDjEe65xsg0ogRM/Nc3DYOhEAlW+xobZo=
gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v2 v2.2.2 h1:ZCJp+EgiOT7lHqUV2J862kp8Qj64Jo6az82+3Td9dZw=
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.2.8 h1:obN1ZagJSUGI0Ek/LBmuj4SNLPfIny3KsKFopxRdj10=
Expand Down
19 changes: 0 additions & 19 deletions api/operator.go
Original file line number Diff line number Diff line change
Expand Up @@ -304,22 +304,3 @@ func (op *Operator) LicenseGet(q *QueryOptions) (*LicenseReply, *QueryMeta, erro
}
return &reply, qm, nil
}

// Metrics returns a slice of bytes containing metrics, optionally formatted as either json or prometheus
func (op *Operator) Metrics(q *QueryOptions) ([]byte, error) {
if q == nil {
q = &QueryOptions{}
}

metricsReader, err := op.c.rawQuery("/v1/metrics", q)
if err != nil {
return nil, err
}

metricsBytes, err := ioutil.ReadAll(metricsReader)
if err != nil {
return nil, err
}

return metricsBytes, nil
}
87 changes: 87 additions & 0 deletions api/operator_metrics.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
package api

import (
"io/ioutil"
"time"
)

// MetricsSummary holds a roll-up of metrics info for a given interval
type MetricsSummary struct {
Timestamp string
Gauges []GaugeValue
Points []PointValue
Counters []SampledValue
Samples []SampledValue
}

type GaugeValue struct {
Name string
Hash string `json:"-"`
Value float32

Labels []Label `json:"-"`
DisplayLabels map[string]string `json:"Labels"`
}

type PointValue struct {
Name string
Points []float32
}

type SampledValue struct {
Name string
Hash string `json:"-"`
*AggregateSample
Mean float64
Stddev float64

Labels []Label `json:"-"`
DisplayLabels map[string]string `json:"Labels"`
}

// AggregateSample is used to hold aggregate metrics
// about a sample
type AggregateSample struct {
Count int // The count of emitted pairs
Rate float64 // The values rate per time unit (usually 1 second)
Sum float64 // The sum of values
SumSq float64 `json:"-"` // The sum of squared values
Min float64 // Minimum value
Max float64 // Maximum value
LastUpdated time.Time `json:"-"` // When value was last updated
}

type Label struct {
Name string
Value string
}

// Metrics returns a slice of bytes containing metrics, optionally formatted as either json or prometheus
func (op *Operator) Metrics(q *QueryOptions) ([]byte, error) {
if q == nil {
q = &QueryOptions{}
}

metricsReader, err := op.c.rawQuery("/v1/metrics", q)
if err != nil {
return nil, err
}

metricsBytes, err := ioutil.ReadAll(metricsReader)
if err != nil {
return nil, err
}

return metricsBytes, nil
}

// MetricsSummary returns a MetricsSummary struct and query metadata
func (op *Operator) MetricsSummary(q *QueryOptions) (*MetricsSummary, *QueryMeta, error) {
var resp *MetricsSummary
qm, err := op.c.query("/v1/metrics", &resp, q)
if err != nil {
return nil, nil, err
}

return resp, qm, nil
}
49 changes: 49 additions & 0 deletions api/operator_metrics_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
package api

import (
"testing"

"github.com/stretchr/testify/require"
)

func TestOperator_MetricsSummary(t *testing.T) {
t.Parallel()
c, s := makeClient(t, nil, nil)
defer s.Stop()

operator := c.Operator()
qo := &QueryOptions{
Params: map[string]string{
"pretty": "1",
},
}

metrics, qm, err := operator.MetricsSummary(qo)
require.NoError(t, err)
require.NotNil(t, metrics)
require.NotNil(t, qm)
require.NotNil(t, metrics.Timestamp) // should always get a TimeStamp
require.GreaterOrEqual(t, len(metrics.Points), 0) // may not have points yet
require.GreaterOrEqual(t, len(metrics.Gauges), 1) // should have at least 1 gauge
require.GreaterOrEqual(t, len(metrics.Counters), 1) // should have at least 1 counter
require.GreaterOrEqual(t, len(metrics.Samples), 1) // should have at least 1 sample
}

func TestOperator_Metrics_Prometheus(t *testing.T) {
t.Parallel()
c, s := makeClient(t, nil, nil)
defer s.Stop()

operator := c.Operator()
qo := &QueryOptions{
Params: map[string]string{
"format": "prometheus",
},
}

metrics, err := operator.Metrics(qo)
require.NoError(t, err)
require.NotNil(t, metrics)
metricString := string(metrics[:])
require.Containsf(t, metricString, "# HELP", "expected Prometheus format containing \"# HELP\", got: \n%s", metricString)
}
46 changes: 37 additions & 9 deletions command/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,13 @@ Metrics Specific Options

-format <format>
Specify output format (prometheus)

-json
Output the allocation in its JSON format.

-t
Format and display allocation using a Go template.

`

return strings.TrimSpace(helpText)
Expand All @@ -42,19 +49,23 @@ func (c *OperatorMetricsCommand) AutocompleteFlags() complete.Flags {
complete.Flags{
"-pretty": complete.PredictAnything,
"-format": complete.PredictAnything,
"-json": complete.PredictNothing,
"-t": complete.PredictAnything,
})
}

func (c *OperatorMetricsCommand) Name() string { return "metrics" }

func (c *OperatorMetricsCommand) Run(args []string) int {
var pretty bool
var format string
var pretty, json bool
var format, tmpl string

flags := c.Meta.FlagSet(c.Name(), FlagSetClient)
flags.Usage = func() { c.Ui.Output(c.Help()) }
flags.BoolVar(&pretty, "pretty", false, "")
flags.StringVar(&format, "format", "", "")
flags.BoolVar(&json, "json", false, "")
flags.StringVar(&tmpl, "t", "", "")

if err := flags.Parse(args); err != nil {
c.Ui.Error(fmt.Sprintf("Error parsing flags: %s", err))
Expand Down Expand Up @@ -88,14 +99,31 @@ func (c *OperatorMetricsCommand) Run(args []string) int {
Params: params,
}

bs, err := client.Operator().Metrics(query)
if err != nil {
c.Ui.Error(fmt.Sprintf("Error getting metrics: %v", err))
return 1
if json || len(tmpl) > 0 {
davemay99 marked this conversation as resolved.
Show resolved Hide resolved
metrics, _, err := client.Operator().MetricsSummary(query)
if err != nil {
c.Ui.Error(fmt.Sprintf("Error querying metrics: %v", err))
return 1
}

out, err := Format(json, tmpl, metrics)
if err != nil {
c.Ui.Error(err.Error())
return 1
}

c.Ui.Output(out)
return 0
} else {
bs, err := client.Operator().Metrics(query)
if err != nil {
c.Ui.Error(fmt.Sprintf("Error getting metrics: %v", err))
return 1
}

resp := string(bs[:])
c.Ui.Output(resp)
}

resp := string(bs[:])
c.Ui.Output(resp)

return 0
}
14 changes: 14 additions & 0 deletions command/metrics_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,20 @@ func TestCommand_Metrics_Cases(t *testing.T) {
expectedOutput string
expectedError string
}{
{
"gotemplate MetricsSummary",
[]string{"-address=" + url, "-t", "'{{ .Timestamp }}'"},
0,
"UTC",
"",
},
{
"json formatted MetricsSummary",
[]string{"-address=" + url, "-json"},
0,
"{",
"",
},
{
"pretty print json",
[]string{"-address=" + url, "-pretty"},
Expand Down
74 changes: 64 additions & 10 deletions command/operator_debug.go
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ Debug Options:
profiles. Accepts id prefixes.

-server-id=<server>,<server>
Comma separated list of Nomad server names, or "leader" to monitor for logs and include pprof
Comma separated list of Nomad server names, "leader", or "all" to monitor for logs and include pprof
profiles.

-stale=<true|false>
Expand Down Expand Up @@ -251,9 +251,27 @@ func (c *OperatorDebugCommand) Run(args []string) int {
}
}

// Resolve server prefixes
for _, id := range argNodes(serverIDs) {
c.serverIDs = append(c.serverIDs, id)
// Resolve servers
members, err := client.Agent().Members()
c.writeJSON("version", "members.json", members, err)
davemay99 marked this conversation as resolved.
Show resolved Hide resolved
// We always write the error to the file, but don't range if no members found
if members != nil { // members
davemay99 marked this conversation as resolved.
Show resolved Hide resolved
if serverIDs == "all" {
// Special case to capture from all servers
for _, member := range members.Members {
c.serverIDs = append(c.serverIDs, member.Name)
}
} else {
for _, id := range argNodes(serverIDs) {
c.serverIDs = append(c.serverIDs, id)
}
}
}

// Return error if servers were specified but not found
if len(serverIDs) > 0 && len(c.serverIDs) == 0 {
c.Ui.Error(fmt.Sprintf("Failed to retrieve servers, 0 members found in list: %s", serverIDs))
return 1
}

c.manifest = make([]string, 0)
Expand All @@ -267,6 +285,8 @@ func (c *OperatorDebugCommand) Run(args []string) int {
stamped := "nomad-debug-" + c.timestamp

c.Ui.Output("Starting debugger and capturing cluster data...")
c.Ui.Output(fmt.Sprintf("Capturing from servers: %v", c.serverIDs))
c.Ui.Output(fmt.Sprintf("Capturing from client nodes: %v", c.nodeIDs))

c.Ui.Output(fmt.Sprintf(" Interval: '%s'", interval))
c.Ui.Output(fmt.Sprintf(" Duration: '%s'", duration))
Expand Down Expand Up @@ -499,6 +519,23 @@ func (c *OperatorDebugCommand) collectPprof(path, id string, client *api.Client)
if err == nil {
c.writeBytes(path, "goroutine.prof", bs)
}

// Gather goroutine text output - debug type 1
davemay99 marked this conversation as resolved.
Show resolved Hide resolved
// debug type 1 writes the legacy text format for human readable output
opts.Debug = 1
bs, err = client.Agent().Lookup("goroutine", opts, nil)
if err == nil {
c.writeBytes(path, "goroutine-debug1.txt", bs)
}

// Gather goroutine text output - debug type 2
davemay99 marked this conversation as resolved.
Show resolved Hide resolved
// When printing the "goroutine" profile, debug=2 means to print the goroutine
// stacks in the same form that a Go program uses when dying due to an unrecovered panic.
opts.Debug = 2
bs, err = client.Agent().Lookup("goroutine", opts, nil)
if err == nil {
c.writeBytes(path, "goroutine-debug2.txt", bs)
}
}

// collectPeriodic runs for duration, capturing the cluster state every interval. It flushes and stops
Expand Down Expand Up @@ -576,8 +613,13 @@ func (c *OperatorDebugCommand) collectNomad(dir string, client *api.Client) erro
vs, _, err := client.CSIVolumes().List(qo)
c.writeJSON(dir, "volumes.json", vs, err)

metrics, err := client.Operator().Metrics(qo)
c.writeJSON(dir, "metrics.json", metrics, err)
metricBytes, err := client.Operator().Metrics(qo)

if err != nil {
c.writeError(dir, "metrics.json", err)
} else {
c.writeBytes(dir, "metrics.json", metricBytes)
}
davemay99 marked this conversation as resolved.
Show resolved Hide resolved

return nil
}
Expand Down Expand Up @@ -628,12 +670,24 @@ func (c *OperatorDebugCommand) collectVault(dir, vault string) error {

// writeBytes writes a file to the archive, recording it in the manifest
func (c *OperatorDebugCommand) writeBytes(dir, file string, data []byte) error {
path := filepath.Join(dir, file)
c.manifest = append(c.manifest, path)
path = filepath.Join(c.collectDir, path)
relativePath := filepath.Join(dir, file)
c.manifest = append(c.manifest, relativePath)
dirPath := filepath.Join(c.collectDir, dir)
filePath := filepath.Join(dirPath, file)

// Ensure parent directories exist
err := os.MkdirAll(dirPath, os.ModePerm)
if err != nil {
// Display error immediately -- may not see this if files aren't written
c.Ui.Error(fmt.Sprintf("failed to create parent directories of \"%s\": %s", dirPath, err.Error()))
return err
}

fh, err := os.Create(path)
// Create the file
fh, err := os.Create(filePath)
if err != nil {
// Display error immediately -- may not see this if files aren't written
c.Ui.Error(fmt.Sprintf("failed to create file \"%s\": %s", filePath, err.Error()))
return err
}
defer fh.Close()
Expand Down
Loading