Skip to content

Commit

Permalink
Simplify the Ruby stack walker
Browse files Browse the repository at this point in the history
The overly complicated stack walking in two steps was implemented to try
to pack as much work as possible per loop iteration to prevent the BPF
verifier from rejecting our program due to the high complexity while
analysing all the potential code paths, but I don't think this is needed
anymore, so removing it to make the code vastly simpler!

Signed-off-by: Francisco Javier Honduvilla Coto <javierhonduco@gmail.com>
  • Loading branch information
javierhonduco committed Oct 16, 2022
1 parent 3e819cc commit 6f4f78c
Show file tree
Hide file tree
Showing 3 changed files with 8 additions and 69 deletions.
62 changes: 6 additions & 56 deletions src/bpf/rbperf.bpf.c
Original file line number Diff line number Diff line change
Expand Up @@ -54,13 +54,6 @@ struct {
__type(value, RubyVersionOffsets);
} version_specific_offsets SEC(".maps");

struct {
__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
__uint(max_entries, 1);
__type(key, u32);
__type(value, RubyStackAddresses);
} scratch_stack SEC(".maps");

struct {
__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
__uint(max_entries, 1);
Expand Down Expand Up @@ -199,7 +192,7 @@ read_frame(u64 pc, u64 body, RubyFrame *current_frame,
}

SEC("perf_event")
int read_ruby_stack(struct bpf_perf_event_data *ctx) {
int walk_ruby_stack(struct bpf_perf_event_data *ctx) {
u64 iseq_addr;
u64 pc;
u64 pc_addr;
Expand All @@ -221,21 +214,12 @@ int read_ruby_stack(struct bpf_perf_event_data *ctx) {
state->ruby_stack_program_count += 1;
u64 control_frame_t_sizeof = version_offsets->control_frame_t_sizeof;

RubyStackAddresses *ruby_stack_addresses = bpf_map_lookup_elem(&scratch_stack, &zero);
if (ruby_stack_addresses == NULL) {
return 0; // this should never happen
}

int rb_frame_count = 0;

#pragma unroll
for (int i = 0; i < MAX_STACKS_PER_PROGRAM; i++) {
rbperf_read(&iseq_addr, 8, (void *)(cfp + iseq_offset));
rbperf_read(&pc_addr, 8, (void *)(cfp + 0));
rbperf_read(&pc, 8, (void *)pc_addr);

RubyStackAddress ruby_stack_address = {};

if (cfp > state->base_stack) {
LOG("[debug] done reading stack");
break;
Expand All @@ -244,45 +228,10 @@ int read_ruby_stack(struct bpf_perf_event_data *ctx) {
if ((void *)iseq_addr == NULL) {
// this could be a native frame, it's missing the check though
// https://github.com/ruby/ruby/blob/4ff3f20/.gdbinit#L1155
ruby_stack_address.iseq_addr = NATIVE_METHOD_MARKER;
ruby_stack_address.pc = NATIVE_METHOD_MARKER;
} else {
ruby_stack_address.iseq_addr = iseq_addr;
ruby_stack_address.pc = pc;
}

unsigned long long offset = rb_frame_count + state->rb_frame_count;
if (offset >= 0 && offset < MAX_STACK) {
ruby_stack_addresses->ruby_stack_address[offset] = ruby_stack_address;
}

rb_frame_count += 1;
cfp += control_frame_t_sizeof;
}

state->cfp = cfp;

#pragma unroll
for (int i = 0; i < MAX_STACKS_PER_PROGRAM; i++) {
RubyStackAddress ruby_stack_address;
unsigned long long offset = i + state->rb_frame_count;
if (i >= rb_frame_count) {
goto end;
}
if (offset >= 0 && offset < MAX_STACK) {
ruby_stack_address = ruby_stack_addresses->ruby_stack_address[offset];
} else {
// this should never happen
return 0;
}
iseq_addr = ruby_stack_address.iseq_addr;
pc = ruby_stack_address.pc;

if (iseq_addr == NATIVE_METHOD_MARKER && pc == NATIVE_METHOD_MARKER) {
// TODO(javierhonduco): Fetch path for native stacks
bpf_probe_read_kernel_str(current_frame.method_name, sizeof(NATIVE_METHOD_NAME), NATIVE_METHOD_NAME);
} else {
rbperf_read(&body, 8, (void *)(iseq_addr + body_offset));
// add check
read_frame(pc, body, &current_frame, version_offsets);
}

Expand All @@ -291,9 +240,11 @@ int read_ruby_stack(struct bpf_perf_event_data *ctx) {
state->stack.frames[actual_index] = find_or_insert_frame(&current_frame);
state->stack.size += 1;
}

cfp += control_frame_t_sizeof;
}
end:
state->rb_frame_count += rb_frame_count;

state->cfp = cfp;
state->base_stack = base_stack;

if (cfp <= base_stack &&
Expand Down Expand Up @@ -416,7 +367,6 @@ int on_event(struct bpf_perf_event_data *ctx) {
state->base_stack = base_stack;
state->cfp = cfp + version_offsets->control_frame_t_sizeof;
state->ruby_stack_program_count = 0;
state->rb_frame_count = 0;
state->rb_version = process_data->rb_version;

bpf_tail_call(ctx, &programs, RBPERF_STACK_READING_PROGRAM_IDX);
Expand Down
13 changes: 1 addition & 12 deletions src/bpf/rbperf.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,6 @@
#define SYSCALL_NR_OFFSET 8
#define SYSCALL_NR_SIZE 4

u64 NATIVE_METHOD_MARKER = 0xFABADA;
static char NATIVE_METHOD_NAME[] = "<native code>";

enum ruby_stack_status {
Expand Down Expand Up @@ -90,7 +89,6 @@ typedef struct {
u64 base_stack;
u64 cfp;
int ruby_stack_program_count;
long long int rb_frame_count;
int rb_version;
} SampleState;

Expand All @@ -115,13 +113,4 @@ typedef struct {
int lineno_offset;
int main_thread_offset;
int ec_offset;
} RubyVersionOffsets;

typedef struct {
u64 iseq_addr;
u64 pc;
} RubyStackAddress;

typedef struct {
RubyStackAddress ruby_stack_address[MAX_STACK];
} RubyStackAddresses;
} RubyVersionOffsets;
2 changes: 1 addition & 1 deletion src/rbperf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -319,7 +319,7 @@ impl<'a> Rbperf<'a> {

// Insert Ruby stack reading program
let idx: i32 = RBPERF_STACK_READING_PROGRAM_IDX.try_into().unwrap();
let val = self.bpf.obj.prog("read_ruby_stack").unwrap().fd();
let val = self.bpf.obj.prog("walk_ruby_stack").unwrap().fd();

let mut maps = self.bpf.maps_mut();
let programs = maps.programs();
Expand Down

0 comments on commit 6f4f78c

Please sign in to comment.