Skip to content

Commit

Permalink
program: block SIGPROF during BPF_PROG_RUN
Browse files Browse the repository at this point in the history
Extend the profiler blocking we introduced for BPF_PROG_LOAD to
BPF_PROG_RUN, with the difference that EINTR is not handled in the
syscall wrapper itself. This allows us to have PROG_RUN specific
retry logic.

Deprecate RunOptions.Reset since its main motivation was to
account for profiler skew. Interruptions from the runtime are only
possible via runtime.AllThreadSyscall, which is very specialised
anyways. This doesn't protect from interruptions due to signals
that are being sent from "outside" the process, which will still
skew results and cause retries.

Document that RunOptions.Repeat is a lower bound for how often
a program is run due to the retry behaviour. Also add a
workaround that makes the common case of repeat == 1 not suffer
from interruptions.

Fixes cilium#853
  • Loading branch information
lmb committed Dec 13, 2022
1 parent 5c1776b commit 707c963
Show file tree
Hide file tree
Showing 3 changed files with 41 additions and 12 deletions.
2 changes: 1 addition & 1 deletion internal/sys/syscall.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ var ENOTSUPP = syscall.Errno(524)
func BPF(cmd Cmd, attr unsafe.Pointer, size uintptr) (uintptr, error) {
// Prevent the Go profiler from repeatedly interrupting the verifier,
// which could otherwise lead to a livelock due to receiving EAGAIN.
if cmd == BPF_PROG_LOAD {
if cmd == BPF_PROG_LOAD || cmd == BPF_PROG_RUN {
maskProfilerSignal()
defer unmaskProfilerSignal()
}
Expand Down
49 changes: 39 additions & 10 deletions prog.go
Original file line number Diff line number Diff line change
Expand Up @@ -503,14 +503,20 @@ func (p *Program) Close() error {
// Various options for Run'ing a Program
type RunOptions struct {
// Program's data input. Required field.
//
// The kernel expects at least 14 bytes input for an ethernet header for
// XDP and SKB programs.
Data []byte
// Program's data after Program has run. Caller must allocate. Optional field.
DataOut []byte
// Program's context input. Optional field.
Context interface{}
// Program's context after Program has run. Must be a pointer or slice. Optional field.
ContextOut interface{}
// Number of times to run Program. Optional field. Defaults to 1.
// Minimum number of times to run Program. Optional field. Defaults to 1.
//
// The program may be executed more often than this due to interruptions, e.g.
// when runtime.AllThreadsSyscall is invoked.
Repeat uint32
// Optional flags.
Flags uint32
Expand All @@ -519,6 +525,8 @@ type RunOptions struct {
CPU uint32
// Called whenever the syscall is interrupted, and should be set to testing.B.ResetTimer
// or similar. Typically used during benchmarking. Optional field.
//
// Deprecated: use [testing.B.ReportMetric] with unit "ns/op" instead.
Reset func()
}

Expand Down Expand Up @@ -548,7 +556,7 @@ func (p *Program) Test(in []byte) (uint32, []byte, error) {

ret, _, err := p.testRun(&opts)
if err != nil {
return ret, nil, fmt.Errorf("can't test program: %w", err)
return ret, nil, fmt.Errorf("test program: %w", err)
}
return ret, opts.DataOut, nil
}
Expand All @@ -559,7 +567,7 @@ func (p *Program) Test(in []byte) (uint32, []byte, error) {
func (p *Program) Run(opts *RunOptions) (uint32, error) {
ret, _, err := p.testRun(opts)
if err != nil {
return ret, fmt.Errorf("can't test program: %w", err)
return ret, fmt.Errorf("run program: %w", err)
}
return ret, nil
}
Expand Down Expand Up @@ -588,12 +596,12 @@ func (p *Program) Benchmark(in []byte, repeat int, reset func()) (uint32, time.D

ret, total, err := p.testRun(&opts)
if err != nil {
return ret, total, fmt.Errorf("can't benchmark program: %w", err)
return ret, total, fmt.Errorf("benchmark program: %w", err)
}
return ret, total, nil
}

var haveProgTestRun = internal.NewFeatureTest("BPF_PROG_TEST_RUN", "4.12", func() error {
var haveProgRun = internal.NewFeatureTest("BPF_PROG_RUN", "4.12", func() error {
prog, err := NewProgram(&ProgramSpec{
// SocketFilter does not require privileges on newer kernels.
Type: SocketFilter,
Expand Down Expand Up @@ -642,7 +650,7 @@ func (p *Program) testRun(opts *RunOptions) (uint32, time.Duration, error) {
return 0, 0, fmt.Errorf("input is too long")
}

if err := haveProgTestRun(); err != nil {
if err := haveProgRun(); err != nil {
return 0, 0, err
}

Expand Down Expand Up @@ -675,24 +683,45 @@ func (p *Program) testRun(opts *RunOptions) (uint32, time.Duration, error) {
Cpu: opts.CPU,
}

if attr.Repeat == 0 {
attr.Repeat = 1
}

retry:
for {
err := sys.ProgRun(&attr)
if err == nil {
break
break retry
}

if errors.Is(err, unix.EINTR) {
if attr.Repeat == 1 {
// Older kernels check whether enough repetitions have been
// executed only after checking for pending signals.
//
// run signal? done? run ...
//
// As a result we can get EINTR for repeat==1 even though
// the program was run exactly once. Treat this as a
// successful run instead.
//
// Since commit 607b9cc92bd7 ("bpf: Consolidate shared test timing code")
// the conditions are reversed:
// run done? signal? ...
break retry
}

if opts.Reset != nil {
opts.Reset()
}
continue
continue retry
}

if errors.Is(err, sys.ENOTSUPP) {
return 0, 0, fmt.Errorf("kernel doesn't support testing program type %s: %w", p.Type(), ErrNotSupported)
return 0, 0, fmt.Errorf("kernel doesn't support running %s: %w", p.Type(), ErrNotSupported)
}

return 0, 0, fmt.Errorf("can't run test: %w", err)
return 0, 0, err
}

if opts.DataOut != nil {
Expand Down
2 changes: 1 addition & 1 deletion prog_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -636,7 +636,7 @@ func TestProgramFromFD(t *testing.T) {
}

func TestHaveProgTestRun(t *testing.T) {
testutils.CheckFeatureTest(t, haveProgTestRun)
testutils.CheckFeatureTest(t, haveProgRun)
}

func TestProgramGetNextID(t *testing.T) {
Expand Down

0 comments on commit 707c963

Please sign in to comment.