Skip to content

Commit

Permalink
feat(cmd/debuginfo) add new metrics to be collected (#7439) (#7562)
Browse files Browse the repository at this point in the history
- This PR adds 5 new metrics (/jemalloc, /state, /health, /debug/vars, /metrics) to be collected when running  the `debuginfo` 
- the flag `-p` has been changed to `-m` to reflect the change. This new flag will let you pick up one or multiple metrics/pprof to be collected:
```
-m, --metrics strings    List of metrics & profile to dump in the report. (default [jemalloc,state,health,vars,metrics,heap,cpu_profile,trace,goroutine,threadcreate,block,mutex])
```
- default value of `-s` flag is now 30sec since it requires 30s to collect a cpu profile
- when saving the metric/profile - it will log the metric/profile name and the path where it's saving to.
- added new flag for time based profiles that are cpu and trace profiles:
```
-c, --cron_pprof strings   time-based pprof (default [cpu_profile,trace])
```

output of running the `debuginfo` command:
```
 dgraph debuginfo -s 30
I0305 21:01:44.730085   10587 run.go:126] using directory /tmp/dgraph-debuginfo041885197 for debug info dump.
I0305 21:01:44.730251   10587 debugging.go:74] fetching information over HTTP from http://localhost:8080/jemalloc
I0305 21:01:44.730261   10587 debugging.go:76] please wait... (30s)
I0305 21:01:44.731077   10587 debugging.go:51] saving jemalloc metric in /tmp/dgraph-debuginfo041885197/alpha_jemalloc.gz
I0305 21:01:44.731086   10587 debugging.go:74] fetching information over HTTP from http://localhost:8080/state
I0305 21:01:44.731092   10587 debugging.go:76] please wait... (30s)
I0305 21:01:44.731720   10587 debugging.go:51] saving state metric in /tmp/dgraph-debuginfo041885197/alpha_state.gz
I0305 21:01:44.731731   10587 debugging.go:74] fetching information over HTTP from http://localhost:8080/health
I0305 21:01:44.731736   10587 debugging.go:76] please wait... (30s)
I0305 21:01:44.732048   10587 debugging.go:51] saving health metric in /tmp/dgraph-debuginfo041885197/alpha_health.gz
I0305 21:01:44.732058   10587 debugging.go:74] fetching information over HTTP from http://localhost:8080/debug/vars
I0305 21:01:44.732065   10587 debugging.go:76] please wait... (30s)
I0305 21:01:44.732557   10587 debugging.go:51] saving vars metric in /tmp/dgraph-debuginfo041885197/alpha_vars.gz
I0305 21:01:44.732568   10587 debugging.go:74] fetching information over HTTP from http://localhost:8080/metrics
I0305 21:01:44.732573   10587 debugging.go:76] please wait... (30s)
I0305 21:01:44.734904   10587 debugging.go:51] saving metrics metric in /tmp/dgraph-debuginfo041885197/alpha_metrics.gz
I0305 21:01:44.734912   10587 debugging.go:74] fetching information over HTTP from http://localhost:8080/debug/pprof/heap
I0305 21:01:44.734917   10587 debugging.go:76] please wait... (30s)
I0305 21:01:44.738036   10587 debugging.go:51] saving heap metric in /tmp/dgraph-debuginfo041885197/alpha_heap.gz
I0305 21:01:44.738048   10587 debugging.go:74] fetching information over HTTP from http://localhost:8080/debug/pprof/goroutine?debug=2
I0305 21:01:44.738057   10587 debugging.go:76] please wait... (30s)
I0305 21:01:44.739136   10587 debugging.go:51] saving goroutine metric in /tmp/dgraph-debuginfo041885197/alpha_goroutine.gz
I0305 21:01:44.739145   10587 debugging.go:74] fetching information over HTTP from http://localhost:8080/debug/pprof/threadcreate
I0305 21:01:44.739151   10587 debugging.go:76] please wait... (30s)
I0305 21:01:44.740182   10587 debugging.go:51] saving threadcreate metric in /tmp/dgraph-debuginfo041885197/alpha_threadcreate.gz
I0305 21:01:44.740192   10587 debugging.go:74] fetching information over HTTP from http://localhost:8080/debug/pprof/block
I0305 21:01:44.740198   10587 debugging.go:76] please wait... (30s)
I0305 21:01:44.741154   10587 debugging.go:51] saving block metric in /tmp/dgraph-debuginfo041885197/alpha_block.gz
I0305 21:01:44.741163   10587 debugging.go:74] fetching information over HTTP from http://localhost:8080/debug/pprof/mutex
I0305 21:01:44.741169   10587 debugging.go:76] please wait... (30s)
I0305 21:01:44.742330   10587 debugging.go:51] saving mutex metric in /tmp/dgraph-debuginfo041885197/alpha_mutex.gz
I0305 21:01:44.742341   10587 debugging.go:74] fetching information over HTTP from http://localhost:8080/debug/pprof/profile?seconds=30s
I0305 21:01:44.742348   10587 debugging.go:76] please wait... (30s)
I0305 21:02:14.812492   10587 debugging.go:63] saving cpu_profile metric in /tmp/dgraph-debuginfo041885197/alpha_cpu_profile.gz
I0305 21:02:14.812570   10587 debugging.go:74] fetching information over HTTP from http://localhost:8080/debug/pprof/trace?seconds=30s
I0305 21:02:14.812596   10587 debugging.go:76] please wait... (30s)
I0305 21:02:15.816449   10587 debugging.go:63] saving trace metric in /tmp/dgraph-debuginfo041885197/alpha_trace.gz
I0305 21:02:15.842375   10587 run.go:159] Debuginfo archive successful: dgraph-debuginfo041885197.tar.gz

```

(cherry picked from commit 79ada0e)
  • Loading branch information
OmarAyo authored Mar 12, 2021
1 parent 4b8a1fd commit 12ae8f7
Show file tree
Hide file tree
Showing 2 changed files with 69 additions and 40 deletions.
40 changes: 18 additions & 22 deletions dgraph/cmd/debuginfo/pprof.go → dgraph/cmd/debuginfo/debugging.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,17 +29,7 @@ import (
"github.com/golang/glog"
)

var pprofProfileTypes = []string{
"goroutine",
"heap",
"threadcreate",
"block",
"mutex",
"profile",
"trace",
}

func saveProfiles(addr, pathPrefix string, duration time.Duration, profiles []string) {
func saveMetrics(addr, pathPrefix string, seconds uint32, metricTypes []string) {
u, err := url.Parse(addr)
if err != nil || (u.Host == "" && u.Scheme != "" && u.Scheme != "file") {
u, err = url.Parse("http://" + addr)
Expand All @@ -49,27 +39,33 @@ func saveProfiles(addr, pathPrefix string, duration time.Duration, profiles []st
return
}

for _, profileType := range profiles {
source := fmt.Sprintf("%s/debug/pprof/%s?duration=%d", u.String(),
profileType, int(duration.Seconds()))
savePath := fmt.Sprintf("%s%s.gz", pathPrefix, profileType)
duration := time.Duration(seconds) * time.Second

if err := saveProfile(source, savePath, duration); err != nil {
glog.Errorf("error while saving pprof profile from %s: %s", source, err)
for _, metricType := range metricTypes {
source := u.String() + metricMap[metricType]
switch metricType {
case "cpu":
source += fmt.Sprintf("%s%d", "?seconds=", seconds)
case "trace":
source += fmt.Sprintf("%s%d", "?seconds=", seconds)
}
savePath := fmt.Sprintf("%s%s.gz", pathPrefix, metricType)
if err := saveDebug(source, savePath, duration); err != nil {
glog.Errorf("error while saving metric from %s: %s", source, err)
continue
}

glog.Infof("saving %s profile in %s", profileType, savePath)
glog.Infof("saving %s metric in %s", metricType, savePath)
}
}

// saveProfile writes the profile specified in the argument fetching it from the host
// saveDebug writes the debug info specified in the argument fetching it from the host
// provided in the configuration
func saveProfile(sourceURL, filePath string, duration time.Duration) error {
func saveDebug(sourceURL, filePath string, duration time.Duration) error {
var err error
var resp io.ReadCloser

glog.Infof("fetching profile over HTTP from %s", sourceURL)
glog.Infof("fetching information over HTTP from %s", sourceURL)
if duration > 0 {
glog.Info(fmt.Sprintf("please wait... (%v)", duration))
}
Expand All @@ -83,7 +79,7 @@ func saveProfile(sourceURL, filePath string, duration time.Duration) error {
defer resp.Close()
out, err := os.Create(filePath)
if err != nil {
return fmt.Errorf("error while creating profile dump file: %s", err)
return fmt.Errorf("error while creating debug file: %s", err)
}
_, err = io.Copy(out, resp)
return err
Expand Down
69 changes: 51 additions & 18 deletions dgraph/cmd/debuginfo/run.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,28 +21,57 @@ import (
"io/ioutil"
"os"
"path/filepath"
"time"

"github.com/dgraph-io/dgraph/x"
"github.com/golang/glog"
"github.com/spf13/cobra"
)

type debugInfoCmdOpts struct {
alphaAddr string
zeroAddr string
archive bool
directory string
duration uint32

pprofProfiles []string
alphaAddr string
zeroAddr string
archive bool
directory string
seconds uint32
metricTypes []string
}

var (
DebugInfo x.SubCommand
debugInfoCmd = debugInfoCmdOpts{}
)

var metricMap = map[string]string{
"jemalloc": "/jemalloc",
"state": "/state",
"health": "/health",
"vars": "/debug/vars",
"metrics": "/metrics",
"heap": "/debug/pprof/heap",
"goroutine": "/debug/pprof/goroutine?debug=2",
"threadcreate": "/debug/pprof/threadcreate",
"block": "/debug/pprof/block",
"mutex": "/debug/pprof/mutex",
"cpu": "/debug/pprof/profile",
"trace": "/debug/pprof/trace",
}

var metricList = []string{
"heap",
"cpu",
"state",
"health",
"jemalloc",
"trace",
"metrics",
"vars",
"trace",
"goroutine",
"block",
"mutex",
"threadcreate",
}

func init() {
DebugInfo.Cmd = &cobra.Command{
Use: "debuginfo",
Expand All @@ -54,6 +83,7 @@ func init() {
}
},
}

DebugInfo.EnvPrefix = "DGRAPH_AGENT_DEBUGINFO"

flags := DebugInfo.Cmd.Flags()
Expand All @@ -64,10 +94,11 @@ func init() {
"Directory to write the debug info into.")
flags.BoolVarP(&debugInfoCmd.archive, "archive", "x", true,
"Whether to archive the generated report")
flags.Uint32VarP(&debugInfoCmd.duration, "seconds", "s", 15,
"Duration for time-based profile collection.")
flags.StringSliceVarP(&debugInfoCmd.pprofProfiles, "profiles", "p", pprofProfileTypes,
"List of pprof profiles to dump in the report.")
flags.Uint32VarP(&debugInfoCmd.seconds, "seconds", "s", 30,
"Duration for time-based metric collection.")
flags.StringSliceVarP(&debugInfoCmd.metricTypes, "metrics", "m", metricList,
"List of metrics & profile to dump in the report.")

}

func collectDebugInfo() (err error) {
Expand All @@ -84,25 +115,27 @@ func collectDebugInfo() (err error) {
}
glog.Infof("using directory %s for debug info dump.", debugInfoCmd.directory)

collectPProfProfiles()
collectDebug()

if debugInfoCmd.archive {
return archiveDebugInfo()
}
return nil
}

func collectPProfProfiles() {
duration := time.Duration(debugInfoCmd.duration) * time.Second

func collectDebug() {
if debugInfoCmd.alphaAddr != "" {
filePrefix := filepath.Join(debugInfoCmd.directory, "alpha_")
saveProfiles(debugInfoCmd.alphaAddr, filePrefix, duration, debugInfoCmd.pprofProfiles)

saveMetrics(debugInfoCmd.alphaAddr, filePrefix, debugInfoCmd.seconds, debugInfoCmd.metricTypes)

}

if debugInfoCmd.zeroAddr != "" {
filePrefix := filepath.Join(debugInfoCmd.directory, "zero_")
saveProfiles(debugInfoCmd.zeroAddr, filePrefix, duration, debugInfoCmd.pprofProfiles)

saveMetrics(debugInfoCmd.zeroAddr, filePrefix, debugInfoCmd.seconds, debugInfoCmd.metricTypes)

}
}

Expand Down

0 comments on commit 12ae8f7

Please sign in to comment.