diff --git a/docs/DEBUGGING.md b/docs/DEBUGGING.md index a9c9796b99..7b1c3085f8 100644 --- a/docs/DEBUGGING.md +++ b/docs/DEBUGGING.md @@ -67,6 +67,11 @@ cp -r /containers/services/pillar/lower /persist/services/pillar # reboot and enjoy updates to the pillar container ``` +## pprof access + +pprof can be enabled with `eve http-debug`; now zedbox listens on port 6543 which can be forwarded f.e. with edge-view. +Information on how to use pprof can be found here: + ## Keyboard/console access For security reasons the USB ports are disabled by default. The only exception is during hardware onboarding when an override file might be needed from a USB stick to do the initial network configuration as specified in in [DEVICE-CONNECTIVITY](DEVICE-CONNECTIVITY.md). During that onboarding USB keyboard access is currently also allowed. However, the USB access does not start until the pillar container is started. diff --git a/pkg/dom0-ztools/rootfs/bin/eve b/pkg/dom0-ztools/rootfs/bin/eve index bda66756c8..ff64c25b10 100755 --- a/pkg/dom0-ztools/rootfs/bin/eve +++ b/pkg/dom0-ztools/rootfs/bin/eve @@ -19,6 +19,9 @@ Welcome to EVE! persist attach config mount config unmount + http-debug + dump-stacks + dump-memory firewall drop verbose on|off version @@ -83,6 +86,33 @@ unmount_partlabel() { fi } +http_debug_request() { + URL="$1" + running=0 + + if nc -z 127.1 6543; then + running=1 + fi + + if [ "$running" = "0" ]; then + pkill -USR2 /opt/zededa/bin/zedbox + fi + + printf "POST %s HTTP/1.0\r\n\r\n" "$URL" | nc 127.1 6543 + + if [ "$running" = "0" ]; then + printf "POST /stop HTTP/1.0\r\n\r\n" | nc 127.1 6543 + fi +} + +dump_stacks() { + http_debug_request "/dump/stacks" +} + +dump_mem() { + http_debug_request "/dump/memory" +} + case "$1" in exec) # shellcheck disable=SC2086 ID=$(${CTR_CMD} t ls | awk '/^'${2:-pillar}' /{print $2;}' 2>/dev/null) @@ -136,6 +166,23 @@ __EOT__ help fi ;; + http-debug) if [ -z "$2" ] || [ "$2" = "start" ]; then + pkill -USR2 /opt/zededa/bin/zedbox + echo "Listening on :6543 -- use 'eve http-debug stop' to stop" + elif [ "$2" = "stop" ]; then + printf "POST /stop HTTP/1.0\r\n\r\n" | nc 127.1 6543 + else + echo "Unknown command; use 'start' or 'stop'" + fi + ;; + dump-stacks) + dump_stacks + echo "Your information can be found in logread" + ;; + dump-memory) + dump_mem + echo "Your information can be found in logread" + ;; verbose) # first lets find our piping process for PIPE in $(pgrep cat); do [ "$(readlink /proc/"$PIPE"/fd/0)" = /run/diag.pipe ] && break diff --git a/pkg/edgeview/src/basics.go b/pkg/edgeview/src/basics.go index f7235f9d29..de5005182e 100644 --- a/pkg/edgeview/src/basics.go +++ b/pkg/edgeview/src/basics.go @@ -132,6 +132,7 @@ func initOpts() { "techsupport", "top", "volume", + "pprof", } logdirectory = []string{ @@ -663,6 +664,8 @@ func printHelp(opt string) { helpExample("cat/ -line ", "display only of lines, like 'head' if is positive, like 'tail' if the is negative", false) case "datastore": helpOn("datastore", "display the device current datastore: EQDN, type, cipher information") + case "pprof": + helpOn("pprof", "pprof/on to turn on pprof; pprof/off to turn off again") case "dmesg": helpOn("dmesg", "display the device current dmesg information") case "download": diff --git a/pkg/edgeview/src/system.go b/pkg/edgeview/src/system.go index fb6e859e87..577103a7b1 100644 --- a/pkg/edgeview/src/system.go +++ b/pkg/edgeview/src/system.go @@ -13,10 +13,12 @@ import ( "encoding/base64" "encoding/json" "encoding/pem" + "errors" "fmt" "io" "net/http" "os" + "os/exec" "path/filepath" "strconv" "strings" @@ -95,6 +97,8 @@ func runSystem(cmds cmdOpt, sysOpt string) { getDmesg() } else if strings.HasPrefix(opt, "tar/") { getTarFile(opt) + } else if strings.HasPrefix(opt, "pprof") { + togglePprof(opt) } else { fmt.Printf("opt %s: not supported yet\n", opt) } @@ -982,6 +986,37 @@ func dispAFile(f os.FileInfo) { fmt.Printf("%s, %v, %d, %s\n", f.Mode().String(), f.ModTime(), f.Size(), f.Name()) } +func togglePprof(opt string) { + toggle := strings.SplitN(opt, "pprof/", 2) + if len(toggle) != 2 { + fmt.Printf("pprof needs to be either pprof/on or pprof/off\n") + return + } + + if toggle[1] == "on" { + runPprof() + } + if toggle[1] == "off" { + stopPprof() + } +} + +func runPprof() { + cmd := exec.Command("/usr/bin/pkill", "-USR2", "/opt/zededa/bin/zedbox") + + err := cmd.Run() + if err != nil { + fmt.Printf("could not signal zedbox to run pprof") + } +} + +func stopPprof() { + _, err := http.Post("http://localhost:6543/stop", "", nil) + if err != nil && !errors.Is(err, io.EOF) { + fmt.Printf("could not stop pprof: %+v\n", err) + } +} + func getTarFile(opt string) { execCmd := strings.SplitN(opt, "tar/", 2) if len(execCmd) != 2 { diff --git a/pkg/pillar/agentlog/agentlog.go b/pkg/pillar/agentlog/agentlog.go index 2681591156..c5b5fe4f0f 100644 --- a/pkg/pillar/agentlog/agentlog.go +++ b/pkg/pillar/agentlog/agentlog.go @@ -5,6 +5,9 @@ package agentlog import ( "fmt" + "io" + "net/http" + "net/http/pprof" "os" "os/signal" "runtime" @@ -12,13 +15,14 @@ import ( "sort" "strings" "sync" + "sync/atomic" "syscall" "time" "github.com/lf-edge/eve/pkg/pillar/base" "github.com/lf-edge/eve/pkg/pillar/types" fileutils "github.com/lf-edge/eve/pkg/pillar/utils/file" - "github.com/satori/go.uuid" + uuid "github.com/satori/go.uuid" "github.com/sirupsen/logrus" ) @@ -149,58 +153,151 @@ func (hook *SkipCallerHook) Levels() []logrus.Level { // Wait on channel then handle the signals func handleSignals(log *base.LogObject, agentName string, agentPid int, sigs chan os.Signal) { - agentDebugDir := fmt.Sprintf("%s/%s/", types.PersistDebugDir, agentName) - sigUsr1FileName := agentDebugDir + "/sigusr1" - sigUsr2FileName := agentDebugDir + "/sigusr2" - for { select { case sig := <-sigs: log.Functionf("handleSignals: received %v\n", sig) switch sig { case syscall.SIGUSR1: - stacks := getStacks(true) - stackArray := strings.Split(stacks, "\n\n") - - sigUsr1File, err := os.OpenFile(sigUsr1FileName, - os.O_WRONLY|os.O_CREATE|os.O_SYNC|os.O_TRUNC, 0755) - if err == nil { - for _, stack := range stackArray { - // This goes to /persist/agentdebug//sigusr1 file - sigUsr1File.WriteString(stack + "\n\n") - } - sigUsr1File.Close() - } else { - log.Errorf("handleSignals: Error opening file %s with: %s", sigUsr1FileName, err) - } - - usr1LogObject := base.EnsureLogObject(log, base.SigUSR1StacksType, - "", uuid.UUID{}, string(base.SigUSR1StacksType)) - if usr1LogObject != nil { - log.Warnf("SIGUSR1 triggered with %d stacks", len(stackArray)) - for _, stack := range stackArray { - usr1LogObject.Warnf("%v", stack) - } - log.Warnf("SIGUSR1: end of stacks") - } + dumpStacks(log, agentName) case syscall.SIGUSR2: - log.Warnf("SIGUSR2 triggered memory info:\n") - sigUsr2File, err := os.OpenFile(sigUsr2FileName, - os.O_WRONLY|os.O_CREATE|os.O_SYNC|os.O_TRUNC, 0755) - if err != nil { - log.Errorf("handleSignals: Error opening file %s with: %s", sigUsr2FileName, err) - } else { - // This goes to /persist/agentdebug//sigusr2 file - sigUsr2File.WriteString("SIGUSR2 triggered memory info:\n") - } + go listenDebug(log, agentName) + } + } + } +} - logMemUsage(log, sigUsr2File) - logMemAllocationSites(log, sigUsr2File) - if sigUsr2File != nil { - sigUsr2File.Close() - } +func dumpMemoryInfo(log *base.LogObject, fileName string) { + log.Warnf("SIGUSR2 triggered memory info:\n") + sigUsr2File, err := os.OpenFile(fileName, + os.O_WRONLY|os.O_CREATE|os.O_SYNC|os.O_TRUNC, 0755) + if err != nil { + log.Errorf("handleSignals: Error opening file %s with: %s", fileName, err) + } else { + // This goes to /persist/agentdebug//sigusr2 file + _, err := sigUsr2File.WriteString("SIGUSR2 triggered memory info:\n") + if err != nil { + log.Errorf("could not write to %s: %+v", fileName, err) + } + } + + logMemUsage(log, sigUsr2File) + logMemAllocationSites(log, sigUsr2File) + if sigUsr2File != nil { + sigUsr2File.Close() + } +} + +func dumpStacks(log *base.LogObject, fileName string) { + stacks := getStacks(true) + stackArray := strings.Split(stacks, "\n\n") + + sigUsr1File, err := os.OpenFile(fileName, + os.O_WRONLY|os.O_CREATE|os.O_SYNC|os.O_TRUNC, 0755) + if err == nil { + for _, stack := range stackArray { + // This goes to /persist/agentdebug//sigusr1 file + _, err := sigUsr1File.WriteString(stack + "\n\n") + if err != nil { + log.Errorf("could not write to %s: %+v", fileName, err) } } + sigUsr1File.Close() + } else { + log.Errorf("handleSignals: Error opening file %s with: %s", fileName, err) + } + + usr1LogObject := base.EnsureLogObject(log, base.SigUSR1StacksType, + "", uuid.UUID{}, string(base.SigUSR1StacksType)) + if usr1LogObject != nil { + log.Warnf("SIGUSR1 triggered with %d stacks", len(stackArray)) + for _, stack := range stackArray { + usr1LogObject.Warnf("%v", stack) + } + log.Warnf("SIGUSR1: end of stacks") + } +} + +func writeOrLog(log *base.LogObject, w io.Writer, msg string) { + if _, err := w.Write([]byte(msg)); err != nil { + log.Errorf("Could not write to %+v: %+v", w, err) + } +} + +var listenDebugRunning atomic.Bool + +func listenDebug(log *base.LogObject, agentName string) { + if listenDebugRunning.Swap(true) { + return + } + + agentDebugDir := fmt.Sprintf("%s/%s/", types.PersistDebugDir, agentName) + stacksDumpFileName := agentDebugDir + "/sigusr1" + memDumpFileName := agentDebugDir + "/sigusr2" + + mux := http.NewServeMux() + + server := &http.Server{ + Addr: "localhost:6543", + Handler: mux, + ReadHeaderTimeout: 10 * time.Second, + } + + info := ` + This server exposes the net/http/pprof API.
+ For examples on how to use it, see: https://pkg.go.dev/net/http/pprof
+ pprof methods

+ To create a flamegraph, do: go tool pprof -raw -output=cpu.txt 'http://localhost:6543/debug/pprof/profile?seconds=5';
+ stackcollapse-go.pl cpu.txt | flamegraph.pl --width 4096 > flame.svg
+ (both scripts can be found here) + ` + + mux.Handle("/", http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "text/html") + writeOrLog(log, w, info) + })) + mux.Handle("/index.html", http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "text/html") + writeOrLog(log, w, info) + })) + + mux.Handle("/debug/pprof/", http.HandlerFunc(pprof.Index)) + mux.Handle("/debug/pprof/cmdline", http.HandlerFunc(pprof.Cmdline)) + mux.Handle("/debug/pprof/profile", http.HandlerFunc(pprof.Profile)) + mux.Handle("/debug/pprof/symbol", http.HandlerFunc(pprof.Symbol)) + mux.Handle("/debug/pprof/trace", http.HandlerFunc(pprof.Trace)) + mux.Handle("/stop", http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.Method == http.MethodPost { + server.Close() + listenDebugRunning.Swap(false) + } else { + http.Error(w, "Did you want to use POST method?", http.StatusMethodNotAllowed) + return + } + })) + mux.Handle("/dump/stacks", http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.Method == http.MethodPost { + dumpStacks(log, stacksDumpFileName) + response := fmt.Sprintf("Stacks can be found in logread or %s\n", stacksDumpFileName) + writeOrLog(log, w, response) + } else { + http.Error(w, "Did you want to use POST method?", http.StatusMethodNotAllowed) + return + } + })) + mux.Handle("/dump/memory", http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.Method == http.MethodPost { + dumpMemoryInfo(log, memDumpFileName) + response := fmt.Sprintf("Stacks can be found in logread or %s\n", memDumpFileName) + writeOrLog(log, w, response) + } else { + http.Error(w, "Did you want to use POST method?", http.StatusMethodNotAllowed) + return + } + })) + + if err := server.ListenAndServe(); err != nil { + log.Errorf("Listening failed: %+v", err) } }