diff --git a/internal/sys/fd.go b/internal/sys/fd.go index 941a56fb9..028be0045 100644 --- a/internal/sys/fd.go +++ b/internal/sys/fd.go @@ -7,6 +7,7 @@ import ( "runtime" "strconv" + "github.com/cilium/ebpf/internal/testutils/fdtrace" "github.com/cilium/ebpf/internal/unix" ) @@ -17,15 +18,7 @@ type FD struct { } func newFD(value int) *FD { - if onLeakFD != nil { - // Attempt to store the caller's stack for the given fd value. - // Panic if fds contains an existing stack for the fd. - old, exist := fds.LoadOrStore(value, callersFrames()) - if exist { - f := old.(*runtime.Frames) - panic(fmt.Sprintf("found existing stack for fd %d:\n%s", value, FormatFrames(f))) - } - } + fdtrace.TraceFD(value, 1) fd := &FD{value} runtime.SetFinalizer(fd, (*FD).finalize) @@ -39,13 +32,7 @@ func (fd *FD) finalize() { return } - // Invoke the fd leak callback. Calls LoadAndDelete to guarantee the callback - // is invoked at most once for one sys.FD allocation, runtime.Frames can only - // be unwound once. - f, ok := fds.LoadAndDelete(fd.Int()) - if ok && onLeakFD != nil { - onLeakFD(f.(*runtime.Frames)) - } + fdtrace.LeakFD(fd.raw) _ = fd.Close() } @@ -96,8 +83,8 @@ func (fd *FD) Close() error { } func (fd *FD) disown() int { - value := int(fd.raw) - fds.Delete(int(value)) + value := fd.raw + fdtrace.ForgetFD(value) fd.raw = -1 runtime.SetFinalizer(fd, nil) diff --git a/internal/sys/fd_test.go b/internal/sys/fd_test.go index 6aaa41211..e94f72a46 100644 --- a/internal/sys/fd_test.go +++ b/internal/sys/fd_test.go @@ -41,6 +41,7 @@ func TestFD(t *testing.T) { fd, err := NewFD(0) qt.Assert(t, qt.IsNil(err)) qt.Assert(t, qt.Not(qt.Equals(fd.Int(), 0)), qt.Commentf("fd value should not be zero")) + qt.Assert(t, qt.IsNil(fd.Close())) var stat unix.Stat_t err = unix.Fstat(0, &stat) diff --git a/internal/sys/fd_trace.go b/internal/sys/fd_trace.go deleted file mode 100644 index cd50dd1f6..000000000 --- a/internal/sys/fd_trace.go +++ /dev/null @@ -1,93 +0,0 @@ -package sys - -import ( - "bytes" - "fmt" - "runtime" - "sync" -) - -// OnLeakFD controls tracing [FD] lifetime to detect resources that are not -// closed by Close(). -// -// If fn is not nil, tracing is enabled for all FDs created going forward. fn is -// invoked for all FDs that are closed by the garbage collector instead of an -// explicit Close() by a caller. Calling OnLeakFD twice with a non-nil fn -// (without disabling tracing in the meantime) will cause a panic. -// -// If fn is nil, tracing will be disabled. Any FDs that have not been closed are -// considered to be leaked, fn will be invoked for them, and the process will be -// terminated. -// -// fn will be invoked at most once for every unique sys.FD allocation since a -// runtime.Frames can only be unwound once. -func OnLeakFD(fn func(*runtime.Frames)) { - // Enable leak tracing if new fn is provided. - if fn != nil { - if onLeakFD != nil { - panic("OnLeakFD called twice with non-nil fn") - } - - onLeakFD = fn - return - } - - // fn is nil past this point. - - if onLeakFD == nil { - return - } - - // Call onLeakFD for all open fds. - if fs := flushFrames(); len(fs) != 0 { - for _, f := range fs { - onLeakFD(f) - } - } - - onLeakFD = nil -} - -var onLeakFD func(*runtime.Frames) - -// fds is a registry of all file descriptors wrapped into sys.fds that were -// created while an fd tracer was active. -var fds sync.Map // map[int]*runtime.Frames - -// flushFrames removes all elements from fds and returns them as a slice. This -// deals with the fact that a runtime.Frames can only be unwound once using -// Next(). -func flushFrames() []*runtime.Frames { - var frames []*runtime.Frames - fds.Range(func(key, value any) bool { - frames = append(frames, value.(*runtime.Frames)) - fds.Delete(key) - return true - }) - return frames -} - -func callersFrames() *runtime.Frames { - c := make([]uintptr, 32) - - // Skip runtime.Callers and this function. - i := runtime.Callers(2, c) - if i == 0 { - return nil - } - - return runtime.CallersFrames(c) -} - -// FormatFrames formats a runtime.Frames as a human-readable string. -func FormatFrames(fs *runtime.Frames) string { - var b bytes.Buffer - for { - f, more := fs.Next() - b.WriteString(fmt.Sprintf("\t%s+%#x\n\t\t%s:%d\n", f.Function, f.PC-f.Entry, f.File, f.Line)) - if !more { - break - } - } - return b.String() -} diff --git a/internal/sys/syscall_test.go b/internal/sys/syscall_test.go index ed6f8029f..ac799c944 100644 --- a/internal/sys/syscall_test.go +++ b/internal/sys/syscall_test.go @@ -4,6 +4,7 @@ import ( "errors" "testing" + "github.com/cilium/ebpf/internal/testutils/fdtrace" "github.com/cilium/ebpf/internal/unix" "github.com/go-quicktest/qt" @@ -59,3 +60,7 @@ func TestSyscallError(t *testing.T) { t.Error("Error is the SyscallError") } } + +func TestMain(m *testing.M) { + fdtrace.TestMain(m) +} diff --git a/internal/testutils/fdtrace/fd.go b/internal/testutils/fdtrace/fd.go deleted file mode 100644 index d7d953480..000000000 --- a/internal/testutils/fdtrace/fd.go +++ /dev/null @@ -1,34 +0,0 @@ -package fdtrace - -import ( - "fmt" - "os" - "runtime" - "testing" - - "github.com/cilium/ebpf/internal/sys" -) - -// TestMain runs m with sys.FD leak tracing enabled. -func TestMain(m *testing.M) { - // fn can either be invoked asynchronously by the gc or during disabling of - // the leak tracer below. Don't terminate the program immediately, instead - // capture a boolean that will be used to set the exit code. This avoids races - // and gives all events the chance to be written to stderr. - var leak bool - sys.OnLeakFD(func(fs *runtime.Frames) { - fmt.Fprintln(os.Stderr, "leaked fd created at:") - fmt.Fprintln(os.Stderr, sys.FormatFrames(fs)) - leak = true - }) - - ret := m.Run() - - sys.OnLeakFD(nil) - - if leak { - ret = 99 - } - - os.Exit(ret) -} diff --git a/internal/testutils/fdtrace/fd_trace.go b/internal/testutils/fdtrace/fd_trace.go new file mode 100644 index 000000000..562df2cc0 --- /dev/null +++ b/internal/testutils/fdtrace/fd_trace.go @@ -0,0 +1,103 @@ +package fdtrace + +import ( + "bytes" + "fmt" + "os" + "runtime" + "sync" + "sync/atomic" +) + +// foundLeak is atomic since the GC may collect objects in parallel. +var foundLeak atomic.Bool + +func onLeakFD(fs *runtime.Frames) { + foundLeak.Store(true) + fmt.Fprintln(os.Stderr, "leaked fd created at:") + fmt.Fprintln(os.Stderr, formatFrames(fs)) +} + +// fds is a registry of all file descriptors wrapped into sys.fds that were +// created while an fd tracer was active. +var fds *sync.Map // map[int]*runtime.Frames + +// TraceFD associates raw with the current execution stack. +// +// skip controls how many entries of the stack the function should skip. +func TraceFD(raw int, skip int) { + if fds == nil { + return + } + + // Attempt to store the caller's stack for the given fd value. + // Panic if fds contains an existing stack for the fd. + old, exist := fds.LoadOrStore(raw, callersFrames(skip)) + if exist { + f := old.(*runtime.Frames) + panic(fmt.Sprintf("found existing stack for fd %d:\n%s", raw, formatFrames(f))) + } +} + +// ForgetFD removes any existing association for raw. +func ForgetFD(raw int) { + if fds != nil { + fds.Delete(raw) + } +} + +// LeakFD indicates that raw was leaked. +// +// Calling the function with a value that was not passed to [TraceFD] before +// is undefined. +func LeakFD(raw int) { + if fds == nil { + return + } + + // Invoke the fd leak callback. Calls LoadAndDelete to guarantee the callback + // is invoked at most once for one sys.FD allocation, runtime.Frames can only + // be unwound once. + f, ok := fds.LoadAndDelete(raw) + if ok { + onLeakFD(f.(*runtime.Frames)) + } +} + +// flushFrames removes all elements from fds and returns them as a slice. This +// deals with the fact that a runtime.Frames can only be unwound once using +// Next(). +func flushFrames() []*runtime.Frames { + var frames []*runtime.Frames + fds.Range(func(key, value any) bool { + frames = append(frames, value.(*runtime.Frames)) + fds.Delete(key) + return true + }) + return frames +} + +func callersFrames(skip int) *runtime.Frames { + c := make([]uintptr, 32) + + // Skip runtime.Callers and this function. + i := runtime.Callers(skip+2, c) + if i == 0 { + return nil + } + + return runtime.CallersFrames(c) +} + +// formatFrames formats a runtime.Frames as a human-readable string. +func formatFrames(fs *runtime.Frames) string { + var b bytes.Buffer + for { + f, more := fs.Next() + b.WriteString(fmt.Sprintf("\t%s+%#x\n\t\t%s:%d\n", f.Function, f.PC-f.Entry, f.File, f.Line)) + if !more { + break + } + } + return b.String() +} diff --git a/internal/testutils/fdtrace/main.go b/internal/testutils/fdtrace/main.go new file mode 100644 index 000000000..c1f7b42d9 --- /dev/null +++ b/internal/testutils/fdtrace/main.go @@ -0,0 +1,31 @@ +package fdtrace + +import ( + "os" + "sync" +) + +type testingM interface { + Run() int +} + +// TestMain runs m with fd tracing enabled. +// +// The function calls [os.Exit] and does not return. +func TestMain(m testingM) { + fds = new(sync.Map) + + ret := m.Run() + + if fs := flushFrames(); len(fs) != 0 { + for _, f := range fs { + onLeakFD(f) + } + } + + if foundLeak.Load() { + ret = 99 + } + + os.Exit(ret) +}