Skip to content

Commit

Permalink
agentlog: add pprof debug http handler
Browse files Browse the repository at this point in the history
pprof show various insights into the process and can be
accessed via http://localhost:6543/

to start it, send USR2 to zedbox or run `eve http-debug`;
to stop it, do curl localhost:6543/stop -X POST
or `eve http-debug stop`

Signed-off-by: Christoph Ostarek <christoph@zededa.com>
  • Loading branch information
christoph-zededa committed May 24, 2023
1 parent d73aeee commit 7740c64
Show file tree
Hide file tree
Showing 4 changed files with 203 additions and 44 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,7 @@ QEMU_OPTS_COMMON= -m $(QEMU_MEMORY) -smp 4 -display none $(QEMU_OPTS_BIOS) \
-serial mon:stdio \
-global ICH9-LPC.noreboot=false -watchdog-action reset \
-rtc base=utc,clock=rt \
-netdev user,id=eth0,net=$(QEMU_OPTS_NET1),dhcpstart=$(QEMU_OPTS_NET1_FIRST_IP),hostfwd=tcp::$(SSH_PORT)-:22$(QEMU_TFTP_OPTS) -device virtio-net-pci,netdev=eth0,romfile="" \
-netdev user,id=eth0,net=$(QEMU_OPTS_NET1),dhcpstart=$(QEMU_OPTS_NET1_FIRST_IP),hostfwd=tcp::$(SSH_PORT)-:22$(QEMU_TFTP_OPTS) -device virtio-net-pci,netdev=eth0,romfile="" \
-netdev user,id=eth1,net=$(QEMU_OPTS_NET2),dhcpstart=$(QEMU_OPTS_NET2_FIRST_IP) -device virtio-net-pci,netdev=eth1,romfile="" \
-device nec-usb-xhci,id=xhci \
-qmp unix:$(CURDIR)/qmp.sock,server,wait=off
Expand Down
5 changes: 5 additions & 0 deletions docs/DEBUGGING.md
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,11 @@ cp -r /containers/services/pillar/lower /persist/services/pillar
# reboot and enjoy updates to the pillar container
```

## pprof access

pprof can be enabled with `eve http-debug`; now zedbox listens on port 6543 which is forwarded to be reachable from outside.
Information on how to use pprof can be found here: <https://pkg.go.dev/net/http/pprof>

## Keyboard/console access

For security reasons the USB ports are disabled by default. The only exception is during hardware onboarding when an override file might be needed from a USB stick to do the initial network configuration as specified in in [DEVICE-CONNECTIVITY](DEVICE-CONNECTIVITY.md). During that onboarding USB keyboard access is currently also allowed. However, the USB access does not start until the pillar container is started.
Expand Down
58 changes: 58 additions & 0 deletions pkg/dom0-ztools/rootfs/bin/eve
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,10 @@ Welcome to EVE!
persist attach <disk>
config mount <mountpoint>
config unmount
http-debug
dump-stacks
dump-memory
ssh
firewall drop
verbose on|off
version
Expand Down Expand Up @@ -82,6 +86,33 @@ unmount_partlabel() {
fi
}

http_debug_request() {
URL="$1"
running=0

if nc -z 127.1 6543; then
running=1
fi

if [ "$running" = "0" ]; then
pkill -USR2 /opt/zededa/bin/zedbox
fi

wget -O - "$URL" --post-data ''

if [ "$running" = "0" ]; then
wget -O - 127.1:6543/stop --post-data '' > /dev/null 2>&1
fi
}

dump_stacks() {
http_debug_request "127.1:6543/dump/stacks"
}

dump_mem() {
http_debug_request "127.1:6543/dump/memory"
}

case "$1" in
exec) # shellcheck disable=SC2086
ID=$(${CTR_CMD} t ls | awk '/^'${2:-pillar}' /{print $2;}' 2>/dev/null)
Expand Down Expand Up @@ -135,6 +166,33 @@ __EOT__
help
fi
;;
ssh)
printf "Enter public key: "
read -r a
echo "$a" >> /run/authorized_keys
echo "Now run 'eve firewall drop' to allow access to ssh"
;;
http-debug) if [ -z "$2" ] || [ "$2" = "start" ]; then
pkill -USR2 /opt/zededa/bin/zedbox
echo "Listening on :6543 -- use 'eve http-debug stop' to stop"
echo "Use 'eve ssh' to configure ssh key"
echo "Use 'eve firewall drop' to allow access to ssh from remote"
echo "Use ssh -L6543:localhost:6543 root@... to forward the http port"
echo "Visit http://localhost:6543 with your browser"
elif [ "$2" = "stop" ]; then
wget -O - 127.1:6543/stop --post-data '' > /dev/null 2>&1
else
echo "Unknown command; use 'start' or 'stop'"
fi
;;
dump-stacks)
dump_stacks
echo "Your information can be found in logread"
;;
dump-memory)
dump_mem
echo "Your information can be found in logread"
;;
verbose) # first lets find our piping process
for PIPE in $(pgrep cat); do
[ "$(readlink /proc/"$PIPE"/fd/0)" = /run/diag.pipe ] && break
Expand Down
182 changes: 139 additions & 43 deletions pkg/pillar/agentlog/agentlog.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,20 +5,24 @@ package agentlog

import (
"fmt"
"io"
"net/http"
"net/http/pprof"
"os"
"os/signal"
"runtime"
dbg "runtime/debug"
"sort"
"strings"
"sync"
"sync/atomic"
"syscall"
"time"

"github.com/lf-edge/eve/pkg/pillar/base"
"github.com/lf-edge/eve/pkg/pillar/types"
fileutils "github.com/lf-edge/eve/pkg/pillar/utils/file"
"github.com/satori/go.uuid"
uuid "github.com/satori/go.uuid"
"github.com/sirupsen/logrus"
)

Expand Down Expand Up @@ -149,58 +153,150 @@ func (hook *SkipCallerHook) Levels() []logrus.Level {

// Wait on channel then handle the signals
func handleSignals(log *base.LogObject, agentName string, agentPid int, sigs chan os.Signal) {
agentDebugDir := fmt.Sprintf("%s/%s/", types.PersistDebugDir, agentName)
sigUsr1FileName := agentDebugDir + "/sigusr1"
sigUsr2FileName := agentDebugDir + "/sigusr2"

for {
select {
case sig := <-sigs:
log.Functionf("handleSignals: received %v\n", sig)
switch sig {
case syscall.SIGUSR1:
stacks := getStacks(true)
stackArray := strings.Split(stacks, "\n\n")

sigUsr1File, err := os.OpenFile(sigUsr1FileName,
os.O_WRONLY|os.O_CREATE|os.O_SYNC|os.O_TRUNC, 0755)
if err == nil {
for _, stack := range stackArray {
// This goes to /persist/agentdebug/<agentname>/sigusr1 file
sigUsr1File.WriteString(stack + "\n\n")
}
sigUsr1File.Close()
} else {
log.Errorf("handleSignals: Error opening file %s with: %s", sigUsr1FileName, err)
}

usr1LogObject := base.EnsureLogObject(log, base.SigUSR1StacksType,
"", uuid.UUID{}, string(base.SigUSR1StacksType))
if usr1LogObject != nil {
log.Warnf("SIGUSR1 triggered with %d stacks", len(stackArray))
for _, stack := range stackArray {
usr1LogObject.Warnf("%v", stack)
}
log.Warnf("SIGUSR1: end of stacks")
}
dumpStacks(log, agentName)
case syscall.SIGUSR2:
log.Warnf("SIGUSR2 triggered memory info:\n")
sigUsr2File, err := os.OpenFile(sigUsr2FileName,
os.O_WRONLY|os.O_CREATE|os.O_SYNC|os.O_TRUNC, 0755)
if err != nil {
log.Errorf("handleSignals: Error opening file %s with: %s", sigUsr2FileName, err)
} else {
// This goes to /persist/agentdebug/<agentname>/sigusr2 file
sigUsr2File.WriteString("SIGUSR2 triggered memory info:\n")
}
go listenDebug(log, agentName)
}
}
}
}

logMemUsage(log, sigUsr2File)
logMemAllocationSites(log, sigUsr2File)
if sigUsr2File != nil {
sigUsr2File.Close()
}
func dumpMemoryInfo(log *base.LogObject, fileName string) {
log.Warnf("SIGUSR2 triggered memory info:\n")
sigUsr2File, err := os.OpenFile(fileName,
os.O_WRONLY|os.O_CREATE|os.O_SYNC|os.O_TRUNC, 0755)
if err != nil {
log.Errorf("handleSignals: Error opening file %s with: %s", fileName, err)
} else {
// This goes to /persist/agentdebug/<agentname>/sigusr2 file
_, err := sigUsr2File.WriteString("SIGUSR2 triggered memory info:\n")
if err != nil {
log.Errorf("could not write to %s: %+v", fileName, err)
}
}

logMemUsage(log, sigUsr2File)
logMemAllocationSites(log, sigUsr2File)
if sigUsr2File != nil {
sigUsr2File.Close()
}
}

func dumpStacks(log *base.LogObject, fileName string) {
stacks := getStacks(true)
stackArray := strings.Split(stacks, "\n\n")

sigUsr1File, err := os.OpenFile(fileName,
os.O_WRONLY|os.O_CREATE|os.O_SYNC|os.O_TRUNC, 0755)
if err == nil {
for _, stack := range stackArray {
// This goes to /persist/agentdebug/<agentname>/sigusr1 file
_, err := sigUsr1File.WriteString(stack + "\n\n")
if err != nil {
log.Errorf("could not write to %s: %+v", fileName, err)
}
}
sigUsr1File.Close()
} else {
log.Errorf("handleSignals: Error opening file %s with: %s", fileName, err)
}

usr1LogObject := base.EnsureLogObject(log, base.SigUSR1StacksType,
"", uuid.UUID{}, string(base.SigUSR1StacksType))
if usr1LogObject != nil {
log.Warnf("SIGUSR1 triggered with %d stacks", len(stackArray))
for _, stack := range stackArray {
usr1LogObject.Warnf("%v", stack)
}
log.Warnf("SIGUSR1: end of stacks")
}
}

func writeOrLog(log *base.LogObject, w io.Writer, msg string) {
if _, err := w.Write([]byte(msg)); err != nil {
log.Errorf("Could not write to %+v: %+v", w, err)
}
}

var listenDebugRunning atomic.Bool

func listenDebug(log *base.LogObject, agentName string) {
if listenDebugRunning.Swap(true) {
return
}

agentDebugDir := fmt.Sprintf("%s/%s/", types.PersistDebugDir, agentName)
stacksDumpFileName := agentDebugDir + "/sigusr1"
memDumpFileName := agentDebugDir + "/sigusr2"

mux := http.NewServeMux()

server := &http.Server{
Addr: "localhost:6543",
Handler: mux,
ReadHeaderTimeout: 10 * time.Second,
}

info := `
This server exposes the net/http/pprof API.</br>
For examples on how to use it, see: <a href="https://pkg.go.dev/net/http/pprof">https://pkg.go.dev/net/http/pprof</a></br>
<a href="debug/pprof/">pprof methods</a></br></br>
To create a flamegraph, do: go tool pprof -raw -output=cpu.txt 'http://localhost:6543/debug/pprof/profile?seconds=5';</br>
stackcollapse-go.pl cpu.txt | flamegraph.pl --width 4096 > flame.svg</br>
(both scripts can be found <a href="https://github.com/brendangregg/FlameGraph">here</a>)
`

mux.Handle("/", http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "text/html")
writeOrLog(log, w, info)
}))
mux.Handle("/index.html", http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "text/html")
writeOrLog(log, w, info)
}))

mux.Handle("/debug/pprof/", http.HandlerFunc(pprof.Index))
mux.Handle("/debug/pprof/cmdline", http.HandlerFunc(pprof.Cmdline))
mux.Handle("/debug/pprof/profile", http.HandlerFunc(pprof.Profile))
mux.Handle("/debug/pprof/symbol", http.HandlerFunc(pprof.Symbol))
mux.Handle("/debug/pprof/trace", http.HandlerFunc(pprof.Trace))
mux.Handle("/stop", http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.Method == http.MethodPost {
server.Close()
} else {
http.Error(w, "Did you want to use POST method?", http.StatusMethodNotAllowed)
return
}
}))
mux.Handle("/dump/stacks", http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.Method == http.MethodPost {
dumpStacks(log, stacksDumpFileName)
response := fmt.Sprintf("Stacks can be found in logread or %s\n", stacksDumpFileName)
writeOrLog(log, w, response)
} else {
http.Error(w, "Did you want to use POST method?", http.StatusMethodNotAllowed)
return
}
}))
mux.Handle("/dump/memory", http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.Method == http.MethodPost {
dumpMemoryInfo(log, memDumpFileName)
response := fmt.Sprintf("Stacks can be found in logread or %s\n", memDumpFileName)
writeOrLog(log, w, response)
} else {
http.Error(w, "Did you want to use POST method?", http.StatusMethodNotAllowed)
return
}
}))

if err := server.ListenAndServe(); err != nil {
log.Errorf("Listening failed: %+v", err)
}
}

Expand Down

0 comments on commit 7740c64

Please sign in to comment.