Skip to content

Commit

Permalink
perf: opportunistically check all rings for data
Browse files Browse the repository at this point in the history
Waking up user space from the kernel is expensive and so the perf reader
allows adjusting the rate at which wakeups happen. This saves CPU at the
cost of latency: some data will remain in the buffer for longer.

The reader is an abstraction over multiple ring buffers. It only reads
from a ring buffer if it has received a wakeup from the kernel. This is
wasteful because wakeups are expensive (due to context switching
and so on) but checking a ring for contents is cheap (just an atomic load).

Change the behaviour so that we read data from any ready ring buffer
regardless of why we were woken up.

Signed-off-by: Lorenz Bauer <lmb@isovalent.com>
  • Loading branch information
lmb committed Apr 10, 2024
1 parent b33a6c1 commit dce5fef
Showing 1 changed file with 9 additions and 14 deletions.
23 changes: 9 additions & 14 deletions perf/reader.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,6 @@ type perfEventHeader struct {
Size uint16
}

func cpuForEvent(event *unix.EpollEvent) int {
return int(event.Pad)
}

// Record contains either a sample or a counter of the
// number of lost samples.
type Record struct {
Expand Down Expand Up @@ -150,7 +146,6 @@ type Reader struct {
// stored in it, so we keep a reference alive.
array *ebpf.Map
rings []*perfEventRing
epollEvents []unix.EpollEvent
epollRings []*perfEventRing
eventHeader []byte

Expand Down Expand Up @@ -262,7 +257,6 @@ func NewReaderWithOptions(array *ebpf.Map, perCPUBuffer int, opts ReaderOptions)
rings: rings,
poller: poller,
deadline: time.Time{},
epollEvents: make([]unix.EpollEvent, len(rings)),
epollRings: make([]*perfEventRing, 0, len(rings)),
eventHeader: make([]byte, perfEventHeaderSize),
pauseFds: pauseFds,
Expand Down Expand Up @@ -354,12 +348,14 @@ func (pr *Reader) ReadInto(rec *Record) error {
return fmt.Errorf("perf ringbuffer: %w", ErrClosed)
}

events := make([]unix.EpollEvent, 1)

for {
if len(pr.epollRings) == 0 {
// NB: The deferred pauseMu.Unlock will panic if Wait panics, which
// might obscure the original panic.
pr.pauseMu.Unlock()
nEvents, err := pr.poller.Wait(pr.epollEvents, pr.deadline)
_, err := pr.poller.Wait(events, pr.deadline)
pr.pauseMu.Lock()
if err != nil {
return err
Expand All @@ -370,14 +366,13 @@ func (pr *Reader) ReadInto(rec *Record) error {
return errMustBePaused
}

for _, event := range pr.epollEvents[:nEvents] {
ring := pr.rings[cpuForEvent(&event)]
pr.epollRings = append(pr.epollRings, ring)

// Read the current head pointer now, not every time
// we read a record. This prevents a single fast producer
// from keeping the reader busy.
// Waking up userspace is expensive, make the most of it by checking
// all rings.
for _, ring := range pr.rings {
ring.loadHead()
if ring.remaining() > 0 {
pr.epollRings = append(pr.epollRings, ring)
}
}
}

Expand Down

0 comments on commit dce5fef

Please sign in to comment.