From 629483784baa95e91c992134241439525cad0dac Mon Sep 17 00:00:00 2001 From: Lorenz Bauer Date: Thu, 26 May 2022 16:29:16 +0000 Subject: [PATCH] program: remove references from ProgramSpec Now that we don't need ProgramSpec.layout anymore to reconstruct CO-RE relocations we can remove ProgramSpec.references. At the same time we can simplify the linking logic. --- asm/instruction.go | 18 ++++-- elf_reader.go | 10 ++-- linker.go | 134 ++++++++++++++++++++++++--------------------- linker_test.go | 4 +- prog.go | 42 +------------- prog_test.go | 35 ------------ 6 files changed, 94 insertions(+), 149 deletions(-) diff --git a/asm/instruction.go b/asm/instruction.go index 5bf9920de..f17d88b51 100644 --- a/asm/instruction.go +++ b/asm/instruction.go @@ -8,6 +8,7 @@ import ( "fmt" "io" "math" + "sort" "strings" "github.com/cilium/ebpf/internal/sys" @@ -568,9 +569,8 @@ func (insns Instructions) SymbolOffsets() (map[string]int, error) { // FunctionReferences returns a set of symbol names these Instructions make // bpf-to-bpf calls to. -func (insns Instructions) FunctionReferences() map[string]bool { - calls := make(map[string]bool) - +func (insns Instructions) FunctionReferences() []string { + calls := make(map[string]struct{}) for _, ins := range insns { if ins.Constant != -1 { // BPF-to-BPF calls have -1 constants. @@ -585,10 +585,16 @@ func (insns Instructions) FunctionReferences() map[string]bool { continue } - calls[ins.Reference()] = true + calls[ins.Reference()] = struct{}{} + } + + result := make([]string, 0, len(calls)) + for call := range calls { + result = append(result, call) } - return calls + sort.Strings(result) + return result } // ReferenceOffsets returns the set of references and their offset in @@ -667,6 +673,8 @@ func (insns Instructions) Format(f fmt.State, c rune) { // Marshal encodes a BPF program into the kernel format. // +// insns may be modified if there are unresolved jumps or bpf2bpf calls. +// // Returns ErrUnsatisfiedProgramReference if there is a Reference Instruction // without a matching Symbol Instruction within insns. func (insns Instructions) Marshal(w io.Writer, bo binary.ByteOrder) error { diff --git a/elf_reader.go b/elf_reader.go index 3337ba84b..df278895c 100644 --- a/elf_reader.go +++ b/elf_reader.go @@ -283,6 +283,7 @@ func (ec *elfCode) loadProgramSections() (map[string]*ProgramSpec, error) { progs := make(map[string]*ProgramSpec) // Generate a ProgramSpec for each function found in each program section. + var export []string for _, sec := range ec.sections { if sec.kind != programSection { continue @@ -319,13 +320,14 @@ func (ec *elfCode) loadProgramSections() (map[string]*ProgramSpec, error) { return nil, fmt.Errorf("duplicate program name %s", name) } progs[name] = spec + + if spec.SectionName != ".text" { + export = append(export, name) + } } } - // Populate each prog's references with pointers to all of its callees. - if err := populateReferences(progs); err != nil { - return nil, fmt.Errorf("populating references: %w", err) - } + flattenPrograms(progs, export) // Hide programs (e.g. library functions) that were not explicitly emitted // to an ELF section. These could be exposed in a separate CollectionSpec diff --git a/linker.go b/linker.go index 97f7c510d..e6276b182 100644 --- a/linker.go +++ b/linker.go @@ -52,68 +52,9 @@ func splitSymbols(insns asm.Instructions) (map[string]asm.Instructions, error) { // Each function is denoted by an ELF symbol and the compiler takes care of // register setup before each jump instruction. -// populateReferences populates all of progs' Instructions and references -// with their full dependency chains including transient dependencies. -func populateReferences(progs map[string]*ProgramSpec) error { - type props struct { - insns asm.Instructions - refs map[string]*ProgramSpec - } - - out := make(map[string]props) - - // Resolve and store direct references between all progs. - if err := findReferences(progs); err != nil { - return fmt.Errorf("finding references: %w", err) - } - - // Flatten all progs' instruction streams. - for name, prog := range progs { - insns, refs := prog.flatten(nil) - - prop := props{ - insns: insns, - refs: refs, - } - - out[name] = prop - } - - // Replace all progs' instructions and references - for name, props := range out { - progs[name].Instructions = props.insns - progs[name].references = props.refs - } - - return nil -} - -// findReferences finds bpf-to-bpf calls between progs and populates each -// prog's references field with its direct neighbours. -func findReferences(progs map[string]*ProgramSpec) error { - // Check all ProgramSpecs in the collection against each other. - for _, prog := range progs { - prog.references = make(map[string]*ProgramSpec) - - // Look up call targets in progs and store pointers to their corresponding - // ProgramSpecs as direct references. - for refname := range prog.Instructions.FunctionReferences() { - ref := progs[refname] - // Call targets are allowed to be missing from an ELF. This occurs when - // a program calls into a forward function declaration that is left - // unimplemented. This is caught at load time during fixups. - if ref != nil { - prog.references[refname] = ref - } - } - } - - return nil -} - -// hasReferences returns true if insns contains one or more bpf2bpf +// hasFunctionReferences returns true if insns contains one or more bpf2bpf // function references. -func hasReferences(insns asm.Instructions) bool { +func hasFunctionReferences(insns asm.Instructions) bool { for _, i := range insns { if i.IsFunctionReference() { return true @@ -160,6 +101,77 @@ func applyRelocations(insns asm.Instructions, local, target *btf.Spec) error { return nil } +// flattenPrograms resolves bpf-to-bpf calls for a set of programs. +// +// Links all programs in names by modifying their ProgramSpec in progs. +func flattenPrograms(progs map[string]*ProgramSpec, names []string) { + // Pre-calculate all function references. + refs := make(map[*ProgramSpec][]string) + for _, prog := range progs { + refs[prog] = prog.Instructions.FunctionReferences() + } + + // Create a flattened instruction stream, but don't modify progs yet to + // avoid linking multiple times. + flattened := make([]asm.Instructions, 0, len(names)) + for _, name := range names { + flattened = append(flattened, flattenInstructions(name, progs, refs)) + } + + // Finally, assign the flattened instructions. + for i, name := range names { + progs[name].Instructions = flattened[i] + } +} + +// flattenInstructions resolves bpf-to-bpf calls for a single program. +// +// Flattens the instructions of prog by concatenating the instructions of all +// direct and indirect dependencies. +// +// progs contains all referenceable programs, while refs contain the direct +// dependencies of each program. +func flattenInstructions(name string, progs map[string]*ProgramSpec, refs map[*ProgramSpec][]string) asm.Instructions { + prog := progs[name] + + insns := make(asm.Instructions, len(prog.Instructions)) + copy(insns, prog.Instructions) + + // Add all direct references of prog to the list of to be linked programs. + pending := make([]string, len(refs[prog])) + copy(pending, refs[prog]) + + // All references for which we've appended instructions. + linked := make(map[string]bool) + + // Iterate all pending references. We can't use a range since pending is + // modified in the body below. + for len(pending) > 0 { + var ref string + ref, pending = pending[0], pending[1:] + + if linked[ref] { + // We've already linked this ref, don't append instructions again. + continue + } + + progRef := progs[ref] + if progRef == nil { + // We don't have instructions that go with this reference. This + // happens when calling extern functions. + continue + } + + insns = append(insns, progRef.Instructions...) + linked[ref] = true + + // Make sure we link indirect references. + pending = append(pending, refs[progRef]...) + } + + return insns +} + // fixupAndValidate is called by the ELF reader right before marshaling the // instruction stream. It performs last-minute adjustments to the program and // runs some sanity checks before sending it off to the kernel. diff --git a/linker_test.go b/linker_test.go index 51bdf8d5e..b8f4e90ba 100644 --- a/linker_test.go +++ b/linker_test.go @@ -38,9 +38,7 @@ func TestFindReferences(t *testing.T) { }, } - if err := populateReferences(progs); err != nil { - t.Fatal(err) - } + flattenPrograms(progs, []string{"entrypoint"}) prog, err := NewProgram(progs["entrypoint"]) testutils.SkipIfNotSupported(t, err) diff --git a/prog.go b/prog.go index dc2bbd80e..d203dc638 100644 --- a/prog.go +++ b/prog.go @@ -102,9 +102,6 @@ type ProgramSpec struct { // The byte order this program was compiled for, may be nil. ByteOrder binary.ByteOrder - - // Programs called by this ProgramSpec. Includes all dependencies. - references map[string]*ProgramSpec } // Copy returns a copy of the spec. @@ -126,43 +123,6 @@ func (ps *ProgramSpec) Tag() (string, error) { return ps.Instructions.Tag(internal.NativeEndian) } -// flatten returns spec's full instruction stream including all of its -// dependencies and an expanded map of references that includes all symbols -// appearing in the instruction stream. -// -// Returns nil, nil if spec was already visited. -func (spec *ProgramSpec) flatten(visited map[*ProgramSpec]bool) (asm.Instructions, map[string]*ProgramSpec) { - if visited == nil { - visited = make(map[*ProgramSpec]bool) - } - - // This program and its dependencies were already collected. - if visited[spec] { - return nil, nil - } - - visited[spec] = true - - // Start off with spec's direct references and instructions. - progs := spec.references - insns := spec.Instructions - - // Recurse into each reference and append/merge its references into - // a temporary buffer as to not interfere with the resolution process. - for _, ref := range spec.references { - if ri, rp := ref.flatten(visited); ri != nil || rp != nil { - insns = append(insns, ri...) - - // Merge nested references into the top-level scope. - for n, p := range rp { - progs[n] = p - } - } - } - - return insns, progs -} - // Program represents BPF program loaded into the kernel. // // It is not safe to close a Program which is used by other goroutines. @@ -343,7 +303,7 @@ func newProgramWithOptions(spec *ProgramSpec, opts ProgramOptions, handles *hand } } - if (errors.Is(err, unix.EINVAL) || errors.Is(err, unix.EPERM)) && hasReferences(spec.Instructions) { + if (errors.Is(err, unix.EINVAL) || errors.Is(err, unix.EPERM)) && hasFunctionReferences(spec.Instructions) { if err := haveBPFToBPFCalls(); err != nil { return nil, fmt.Errorf("load program: %w", internal.ErrorWithLog(err, logBuf, logErr)) } diff --git a/prog_test.go b/prog_test.go index e612c2a6d..6d159149d 100644 --- a/prog_test.go +++ b/prog_test.go @@ -15,7 +15,6 @@ import ( "time" qt "github.com/frankban/quicktest" - "github.com/google/go-cmp/cmp" "github.com/cilium/ebpf/asm" "github.com/cilium/ebpf/btf" @@ -137,40 +136,6 @@ func TestProgramRunWithOptions(t *testing.T) { } } -func TestProgramSpecFlattenOrder(t *testing.T) { - prog_a := ProgramSpec{Name: "prog_a"} - prog_b := ProgramSpec{Name: "prog_b"} - prog_c := ProgramSpec{ - Name: "prog_c", - references: map[string]*ProgramSpec{ - "prog_a": &prog_a, - "prog_b": &prog_b, - }, - } - - spec := ProgramSpec{ - references: map[string]*ProgramSpec{ - // Depend on prog_a since it's a mutual dependency of prog_c. - "prog_a": &prog_a, - // Omit prog_b to ensure indirect dependencies get pulled in. - "prog_c": &prog_c, - }, - } - - // Run the flatten operation twice to make sure both yield the same output. - ins1, refs1 := spec.flatten(nil) - ins2, refs2 := spec.flatten(nil) - - opts := cmp.AllowUnexported(spec) - if diff := cmp.Diff(ins1, ins2, opts); diff != "" { - t.Fatal(diff) - } - - if diff := cmp.Diff(refs1, refs2, opts); diff != "" { - t.Fatal(diff) - } -} - func TestProgramBenchmark(t *testing.T) { prog := mustSocketFilter(t)