From 723ab2e14a231b1fa6741062b953d2654988b914 Mon Sep 17 00:00:00 2001 From: Christoph Ostarek Date: Fri, 19 May 2023 13:15:54 +0200 Subject: [PATCH 1/4] agentlog: add pprof debug http handler pprof show various insights into the process and can be accessed via http://localhost:6543/ to start it, send USR2 to zedbox or run `eve http-debug`; to stop it, do curl localhost:6543/stop -X POST or `eve http-debug stop` Signed-off-by: Christoph Ostarek --- docs/DEBUGGING.md | 5 + pkg/dom0-ztools/rootfs/bin/eve | 47 ++++++++ pkg/pillar/agentlog/agentlog.go | 183 ++++++++++++++++++++++++-------- 3 files changed, 192 insertions(+), 43 deletions(-) diff --git a/docs/DEBUGGING.md b/docs/DEBUGGING.md index a9c9796b99..7b1c3085f8 100644 --- a/docs/DEBUGGING.md +++ b/docs/DEBUGGING.md @@ -67,6 +67,11 @@ cp -r /containers/services/pillar/lower /persist/services/pillar # reboot and enjoy updates to the pillar container ``` +## pprof access + +pprof can be enabled with `eve http-debug`; now zedbox listens on port 6543 which can be forwarded f.e. with edge-view. +Information on how to use pprof can be found here: + ## Keyboard/console access For security reasons the USB ports are disabled by default. The only exception is during hardware onboarding when an override file might be needed from a USB stick to do the initial network configuration as specified in in [DEVICE-CONNECTIVITY](DEVICE-CONNECTIVITY.md). During that onboarding USB keyboard access is currently also allowed. However, the USB access does not start until the pillar container is started. diff --git a/pkg/dom0-ztools/rootfs/bin/eve b/pkg/dom0-ztools/rootfs/bin/eve index bda66756c8..01543ace38 100755 --- a/pkg/dom0-ztools/rootfs/bin/eve +++ b/pkg/dom0-ztools/rootfs/bin/eve @@ -19,6 +19,9 @@ Welcome to EVE! persist attach config mount config unmount + http-debug + dump-stacks + dump-memory firewall drop verbose on|off version @@ -83,6 +86,33 @@ unmount_partlabel() { fi } +http_debug_request() { + URL="$1" + running=0 + + if nc -z 127.1 6543; then + running=1 + fi + + if [ "$running" = "0" ]; then + pkill -USR2 /opt/zededa/bin/zedbox + fi + + wget -O - "$URL" --post-data '' + + if [ "$running" = "0" ]; then + wget -O - 127.1:6543/stop --post-data '' > /dev/null 2>&1 + fi +} + +dump_stacks() { + http_debug_request "127.1:6543/dump/stacks" +} + +dump_mem() { + http_debug_request "127.1:6543/dump/memory" +} + case "$1" in exec) # shellcheck disable=SC2086 ID=$(${CTR_CMD} t ls | awk '/^'${2:-pillar}' /{print $2;}' 2>/dev/null) @@ -136,6 +166,23 @@ __EOT__ help fi ;; + http-debug) if [ -z "$2" ] || [ "$2" = "start" ]; then + pkill -USR2 /opt/zededa/bin/zedbox + echo "Listening on :6543 -- use 'eve http-debug stop' to stop" + elif [ "$2" = "stop" ]; then + wget -O - 127.1:6543/stop --post-data '' > /dev/null 2>&1 + else + echo "Unknown command; use 'start' or 'stop'" + fi + ;; + dump-stacks) + dump_stacks + echo "Your information can be found in logread" + ;; + dump-memory) + dump_mem + echo "Your information can be found in logread" + ;; verbose) # first lets find our piping process for PIPE in $(pgrep cat); do [ "$(readlink /proc/"$PIPE"/fd/0)" = /run/diag.pipe ] && break diff --git a/pkg/pillar/agentlog/agentlog.go b/pkg/pillar/agentlog/agentlog.go index 2681591156..c5b5fe4f0f 100644 --- a/pkg/pillar/agentlog/agentlog.go +++ b/pkg/pillar/agentlog/agentlog.go @@ -5,6 +5,9 @@ package agentlog import ( "fmt" + "io" + "net/http" + "net/http/pprof" "os" "os/signal" "runtime" @@ -12,13 +15,14 @@ import ( "sort" "strings" "sync" + "sync/atomic" "syscall" "time" "github.com/lf-edge/eve/pkg/pillar/base" "github.com/lf-edge/eve/pkg/pillar/types" fileutils "github.com/lf-edge/eve/pkg/pillar/utils/file" - "github.com/satori/go.uuid" + uuid "github.com/satori/go.uuid" "github.com/sirupsen/logrus" ) @@ -149,58 +153,151 @@ func (hook *SkipCallerHook) Levels() []logrus.Level { // Wait on channel then handle the signals func handleSignals(log *base.LogObject, agentName string, agentPid int, sigs chan os.Signal) { - agentDebugDir := fmt.Sprintf("%s/%s/", types.PersistDebugDir, agentName) - sigUsr1FileName := agentDebugDir + "/sigusr1" - sigUsr2FileName := agentDebugDir + "/sigusr2" - for { select { case sig := <-sigs: log.Functionf("handleSignals: received %v\n", sig) switch sig { case syscall.SIGUSR1: - stacks := getStacks(true) - stackArray := strings.Split(stacks, "\n\n") - - sigUsr1File, err := os.OpenFile(sigUsr1FileName, - os.O_WRONLY|os.O_CREATE|os.O_SYNC|os.O_TRUNC, 0755) - if err == nil { - for _, stack := range stackArray { - // This goes to /persist/agentdebug//sigusr1 file - sigUsr1File.WriteString(stack + "\n\n") - } - sigUsr1File.Close() - } else { - log.Errorf("handleSignals: Error opening file %s with: %s", sigUsr1FileName, err) - } - - usr1LogObject := base.EnsureLogObject(log, base.SigUSR1StacksType, - "", uuid.UUID{}, string(base.SigUSR1StacksType)) - if usr1LogObject != nil { - log.Warnf("SIGUSR1 triggered with %d stacks", len(stackArray)) - for _, stack := range stackArray { - usr1LogObject.Warnf("%v", stack) - } - log.Warnf("SIGUSR1: end of stacks") - } + dumpStacks(log, agentName) case syscall.SIGUSR2: - log.Warnf("SIGUSR2 triggered memory info:\n") - sigUsr2File, err := os.OpenFile(sigUsr2FileName, - os.O_WRONLY|os.O_CREATE|os.O_SYNC|os.O_TRUNC, 0755) - if err != nil { - log.Errorf("handleSignals: Error opening file %s with: %s", sigUsr2FileName, err) - } else { - // This goes to /persist/agentdebug//sigusr2 file - sigUsr2File.WriteString("SIGUSR2 triggered memory info:\n") - } + go listenDebug(log, agentName) + } + } + } +} - logMemUsage(log, sigUsr2File) - logMemAllocationSites(log, sigUsr2File) - if sigUsr2File != nil { - sigUsr2File.Close() - } +func dumpMemoryInfo(log *base.LogObject, fileName string) { + log.Warnf("SIGUSR2 triggered memory info:\n") + sigUsr2File, err := os.OpenFile(fileName, + os.O_WRONLY|os.O_CREATE|os.O_SYNC|os.O_TRUNC, 0755) + if err != nil { + log.Errorf("handleSignals: Error opening file %s with: %s", fileName, err) + } else { + // This goes to /persist/agentdebug//sigusr2 file + _, err := sigUsr2File.WriteString("SIGUSR2 triggered memory info:\n") + if err != nil { + log.Errorf("could not write to %s: %+v", fileName, err) + } + } + + logMemUsage(log, sigUsr2File) + logMemAllocationSites(log, sigUsr2File) + if sigUsr2File != nil { + sigUsr2File.Close() + } +} + +func dumpStacks(log *base.LogObject, fileName string) { + stacks := getStacks(true) + stackArray := strings.Split(stacks, "\n\n") + + sigUsr1File, err := os.OpenFile(fileName, + os.O_WRONLY|os.O_CREATE|os.O_SYNC|os.O_TRUNC, 0755) + if err == nil { + for _, stack := range stackArray { + // This goes to /persist/agentdebug//sigusr1 file + _, err := sigUsr1File.WriteString(stack + "\n\n") + if err != nil { + log.Errorf("could not write to %s: %+v", fileName, err) } } + sigUsr1File.Close() + } else { + log.Errorf("handleSignals: Error opening file %s with: %s", fileName, err) + } + + usr1LogObject := base.EnsureLogObject(log, base.SigUSR1StacksType, + "", uuid.UUID{}, string(base.SigUSR1StacksType)) + if usr1LogObject != nil { + log.Warnf("SIGUSR1 triggered with %d stacks", len(stackArray)) + for _, stack := range stackArray { + usr1LogObject.Warnf("%v", stack) + } + log.Warnf("SIGUSR1: end of stacks") + } +} + +func writeOrLog(log *base.LogObject, w io.Writer, msg string) { + if _, err := w.Write([]byte(msg)); err != nil { + log.Errorf("Could not write to %+v: %+v", w, err) + } +} + +var listenDebugRunning atomic.Bool + +func listenDebug(log *base.LogObject, agentName string) { + if listenDebugRunning.Swap(true) { + return + } + + agentDebugDir := fmt.Sprintf("%s/%s/", types.PersistDebugDir, agentName) + stacksDumpFileName := agentDebugDir + "/sigusr1" + memDumpFileName := agentDebugDir + "/sigusr2" + + mux := http.NewServeMux() + + server := &http.Server{ + Addr: "localhost:6543", + Handler: mux, + ReadHeaderTimeout: 10 * time.Second, + } + + info := ` + This server exposes the net/http/pprof API.
+ For examples on how to use it, see: https://pkg.go.dev/net/http/pprof
+ pprof methods

+ To create a flamegraph, do: go tool pprof -raw -output=cpu.txt 'http://localhost:6543/debug/pprof/profile?seconds=5';
+ stackcollapse-go.pl cpu.txt | flamegraph.pl --width 4096 > flame.svg
+ (both scripts can be found here) + ` + + mux.Handle("/", http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "text/html") + writeOrLog(log, w, info) + })) + mux.Handle("/index.html", http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "text/html") + writeOrLog(log, w, info) + })) + + mux.Handle("/debug/pprof/", http.HandlerFunc(pprof.Index)) + mux.Handle("/debug/pprof/cmdline", http.HandlerFunc(pprof.Cmdline)) + mux.Handle("/debug/pprof/profile", http.HandlerFunc(pprof.Profile)) + mux.Handle("/debug/pprof/symbol", http.HandlerFunc(pprof.Symbol)) + mux.Handle("/debug/pprof/trace", http.HandlerFunc(pprof.Trace)) + mux.Handle("/stop", http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.Method == http.MethodPost { + server.Close() + listenDebugRunning.Swap(false) + } else { + http.Error(w, "Did you want to use POST method?", http.StatusMethodNotAllowed) + return + } + })) + mux.Handle("/dump/stacks", http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.Method == http.MethodPost { + dumpStacks(log, stacksDumpFileName) + response := fmt.Sprintf("Stacks can be found in logread or %s\n", stacksDumpFileName) + writeOrLog(log, w, response) + } else { + http.Error(w, "Did you want to use POST method?", http.StatusMethodNotAllowed) + return + } + })) + mux.Handle("/dump/memory", http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.Method == http.MethodPost { + dumpMemoryInfo(log, memDumpFileName) + response := fmt.Sprintf("Stacks can be found in logread or %s\n", memDumpFileName) + writeOrLog(log, w, response) + } else { + http.Error(w, "Did you want to use POST method?", http.StatusMethodNotAllowed) + return + } + })) + + if err := server.ListenAndServe(); err != nil { + log.Errorf("Listening failed: %+v", err) } } From a71accf94e859e02aa8df038e45456bddda03ff5 Mon Sep 17 00:00:00 2001 From: Christoph Ostarek Date: Tue, 30 May 2023 17:37:09 +0200 Subject: [PATCH 2/4] edgeview: allow pprof access this first starts the pprof interface on zedbox and then forwards the port to the edgeview client once the user is done the pprof interface is closed again Signed-off-by: Christoph Ostarek --- pkg/edgeview/src/basics.go | 1 + pkg/edgeview/src/edge-view.go | 7 +++++++ pkg/edgeview/src/network.go | 2 ++ pkg/edgeview/src/system.go | 17 +++++++++++++++++ 4 files changed, 27 insertions(+) diff --git a/pkg/edgeview/src/basics.go b/pkg/edgeview/src/basics.go index f7235f9d29..32c8f15d8a 100644 --- a/pkg/edgeview/src/basics.go +++ b/pkg/edgeview/src/basics.go @@ -75,6 +75,7 @@ func initOpts() { "tcpdump", "trace", "url", + "pprof", "wireless"} pubsubopts = []string{ diff --git a/pkg/edgeview/src/edge-view.go b/pkg/edgeview/src/edge-view.go index eba6030f31..c2774c6d47 100644 --- a/pkg/edgeview/src/edge-view.go +++ b/pkg/edgeview/src/edge-view.go @@ -202,6 +202,13 @@ func main() { pnetopt = pqueryopt } else if strings.HasPrefix(pqueryopt, "app") { psysopt = pqueryopt + } else if pqueryopt == "pprof" { + var ok bool + ok, tcpclientCnt, remotePorts = processTCPcmd("tcp/localhost:6543", remotePorts) + if !ok { + return + } + pnetopt = pqueryopt } else if strings.HasPrefix(pqueryopt, "tcp/") { var ok bool ok, tcpclientCnt, remotePorts = processTCPcmd(pqueryopt, remotePorts) diff --git a/pkg/edgeview/src/network.go b/pkg/edgeview/src/network.go index 03486a2eb4..2544a63338 100644 --- a/pkg/edgeview/src/network.go +++ b/pkg/edgeview/src/network.go @@ -121,6 +121,8 @@ func runNetwork(netw string) { runmDNS(substring) } else if opt == "tcp" { // tcp and proxy are special setAndStartProxyTCP(substring) + } else if opt == "pprof" { + runPprof() } else if opt == "showcerts" { getPeerCerts(substring) } else if opt == "addhost" { diff --git a/pkg/edgeview/src/system.go b/pkg/edgeview/src/system.go index fb6e859e87..75f678d1e3 100644 --- a/pkg/edgeview/src/system.go +++ b/pkg/edgeview/src/system.go @@ -17,6 +17,7 @@ import ( "io" "net/http" "os" + "os/exec" "path/filepath" "strconv" "strings" @@ -982,6 +983,22 @@ func dispAFile(f os.FileInfo) { fmt.Printf("%s, %v, %d, %s\n", f.Mode().String(), f.ModTime(), f.Size(), f.Name()) } +func runPprof() { + cmd := exec.Command("/usr/bin/pkill", "-USR2", "/opt/zededa/bin/zedbox") + + err := cmd.Run() + if err != nil { + fmt.Printf("could not signal zedbox to run pprof") + } + + setAndStartProxyTCP("localhost:6543") + + _, err = http.Post("http://localhost:6543/stop", "", nil) + if err != nil { + fmt.Printf("could not stop pprof: %+v\n", err) + } +} + func getTarFile(opt string) { execCmd := strings.SplitN(opt, "tar/", 2) if len(execCmd) != 2 { From bc2eb80a484558baf51675c8d636ad4628f29861 Mon Sep 17 00:00:00 2001 From: Christoph Ostarek Date: Wed, 7 Jun 2023 13:59:28 +0200 Subject: [PATCH 3/4] eve tool: replace wget with nc in order to keep the image small and reduce the attack surface wget may be removed in the future, so instead use nc Signed-off-by: Christoph Ostarek --- pkg/dom0-ztools/rootfs/bin/eve | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pkg/dom0-ztools/rootfs/bin/eve b/pkg/dom0-ztools/rootfs/bin/eve index 01543ace38..ff64c25b10 100755 --- a/pkg/dom0-ztools/rootfs/bin/eve +++ b/pkg/dom0-ztools/rootfs/bin/eve @@ -98,19 +98,19 @@ http_debug_request() { pkill -USR2 /opt/zededa/bin/zedbox fi - wget -O - "$URL" --post-data '' + printf "POST %s HTTP/1.0\r\n\r\n" "$URL" | nc 127.1 6543 if [ "$running" = "0" ]; then - wget -O - 127.1:6543/stop --post-data '' > /dev/null 2>&1 + printf "POST /stop HTTP/1.0\r\n\r\n" | nc 127.1 6543 fi } dump_stacks() { - http_debug_request "127.1:6543/dump/stacks" + http_debug_request "/dump/stacks" } dump_mem() { - http_debug_request "127.1:6543/dump/memory" + http_debug_request "/dump/memory" } case "$1" in @@ -170,7 +170,7 @@ __EOT__ pkill -USR2 /opt/zededa/bin/zedbox echo "Listening on :6543 -- use 'eve http-debug stop' to stop" elif [ "$2" = "stop" ]; then - wget -O - 127.1:6543/stop --post-data '' > /dev/null 2>&1 + printf "POST /stop HTTP/1.0\r\n\r\n" | nc 127.1 6543 else echo "Unknown command; use 'start' or 'stop'" fi From 235d4f6502a42c6fdf9b9df837667e8973087a0d Mon Sep 17 00:00:00 2001 From: Christoph Ostarek Date: Wed, 7 Jun 2023 14:46:11 +0200 Subject: [PATCH 4/4] edgeview: do not forward tcp for pprof let the user of edgeview do it manually Signed-off-by: Christoph Ostarek --- pkg/edgeview/src/basics.go | 4 +++- pkg/edgeview/src/edge-view.go | 7 ------- pkg/edgeview/src/network.go | 2 -- pkg/edgeview/src/system.go | 26 ++++++++++++++++++++++---- 4 files changed, 25 insertions(+), 14 deletions(-) diff --git a/pkg/edgeview/src/basics.go b/pkg/edgeview/src/basics.go index 32c8f15d8a..de5005182e 100644 --- a/pkg/edgeview/src/basics.go +++ b/pkg/edgeview/src/basics.go @@ -75,7 +75,6 @@ func initOpts() { "tcpdump", "trace", "url", - "pprof", "wireless"} pubsubopts = []string{ @@ -133,6 +132,7 @@ func initOpts() { "techsupport", "top", "volume", + "pprof", } logdirectory = []string{ @@ -664,6 +664,8 @@ func printHelp(opt string) { helpExample("cat/ -line ", "display only of lines, like 'head' if is positive, like 'tail' if the is negative", false) case "datastore": helpOn("datastore", "display the device current datastore: EQDN, type, cipher information") + case "pprof": + helpOn("pprof", "pprof/on to turn on pprof; pprof/off to turn off again") case "dmesg": helpOn("dmesg", "display the device current dmesg information") case "download": diff --git a/pkg/edgeview/src/edge-view.go b/pkg/edgeview/src/edge-view.go index c2774c6d47..eba6030f31 100644 --- a/pkg/edgeview/src/edge-view.go +++ b/pkg/edgeview/src/edge-view.go @@ -202,13 +202,6 @@ func main() { pnetopt = pqueryopt } else if strings.HasPrefix(pqueryopt, "app") { psysopt = pqueryopt - } else if pqueryopt == "pprof" { - var ok bool - ok, tcpclientCnt, remotePorts = processTCPcmd("tcp/localhost:6543", remotePorts) - if !ok { - return - } - pnetopt = pqueryopt } else if strings.HasPrefix(pqueryopt, "tcp/") { var ok bool ok, tcpclientCnt, remotePorts = processTCPcmd(pqueryopt, remotePorts) diff --git a/pkg/edgeview/src/network.go b/pkg/edgeview/src/network.go index 2544a63338..03486a2eb4 100644 --- a/pkg/edgeview/src/network.go +++ b/pkg/edgeview/src/network.go @@ -121,8 +121,6 @@ func runNetwork(netw string) { runmDNS(substring) } else if opt == "tcp" { // tcp and proxy are special setAndStartProxyTCP(substring) - } else if opt == "pprof" { - runPprof() } else if opt == "showcerts" { getPeerCerts(substring) } else if opt == "addhost" { diff --git a/pkg/edgeview/src/system.go b/pkg/edgeview/src/system.go index 75f678d1e3..577103a7b1 100644 --- a/pkg/edgeview/src/system.go +++ b/pkg/edgeview/src/system.go @@ -13,6 +13,7 @@ import ( "encoding/base64" "encoding/json" "encoding/pem" + "errors" "fmt" "io" "net/http" @@ -96,6 +97,8 @@ func runSystem(cmds cmdOpt, sysOpt string) { getDmesg() } else if strings.HasPrefix(opt, "tar/") { getTarFile(opt) + } else if strings.HasPrefix(opt, "pprof") { + togglePprof(opt) } else { fmt.Printf("opt %s: not supported yet\n", opt) } @@ -983,6 +986,21 @@ func dispAFile(f os.FileInfo) { fmt.Printf("%s, %v, %d, %s\n", f.Mode().String(), f.ModTime(), f.Size(), f.Name()) } +func togglePprof(opt string) { + toggle := strings.SplitN(opt, "pprof/", 2) + if len(toggle) != 2 { + fmt.Printf("pprof needs to be either pprof/on or pprof/off\n") + return + } + + if toggle[1] == "on" { + runPprof() + } + if toggle[1] == "off" { + stopPprof() + } +} + func runPprof() { cmd := exec.Command("/usr/bin/pkill", "-USR2", "/opt/zededa/bin/zedbox") @@ -990,11 +1008,11 @@ func runPprof() { if err != nil { fmt.Printf("could not signal zedbox to run pprof") } +} - setAndStartProxyTCP("localhost:6543") - - _, err = http.Post("http://localhost:6543/stop", "", nil) - if err != nil { +func stopPprof() { + _, err := http.Post("http://localhost:6543/stop", "", nil) + if err != nil && !errors.Is(err, io.EOF) { fmt.Printf("could not stop pprof: %+v\n", err) } }