Skip to content

Commit

Permalink
bpf: introduce bpf_perf_event_output() helper
Browse files Browse the repository at this point in the history
This helper is used to send raw data from eBPF program into
special PERF_TYPE_SOFTWARE/PERF_COUNT_SW_BPF_OUTPUT perf_event.
User space needs to perf_event_open() it (either for one or all cpus) and
store FD into perf_event_array (similar to bpf_perf_event_read() helper)
before eBPF program can send data into it.

Today the programs triggered by kprobe collect the data and either store
it into the maps or print it via bpf_trace_printk() where latter is the debug
facility and not suitable to stream the data. This new helper replaces
such bpf_trace_printk() usage and allows programs to have dedicated
channel into user space for post-processing of the raw data collected.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
Alexei Starovoitov authored and davem330 committed Oct 22, 2015
1 parent fa128e6 commit a43eec3
Show file tree
Hide file tree
Showing 5 changed files with 62 additions and 1 deletion.
11 changes: 11 additions & 0 deletions include/uapi/linux/bpf.h
Original file line number Diff line number Diff line change
Expand Up @@ -287,6 +287,17 @@ enum bpf_func_id {
* Return: realm if != 0
*/
BPF_FUNC_get_route_realm,

/**
* bpf_perf_event_output(ctx, map, index, data, size) - output perf raw sample
* @ctx: struct pt_regs*
* @map: pointer to perf_event_array map
* @index: index of event in the map
* @data: data on stack to be output as raw data
* @size: size of data
* Return: 0 on success
*/
BPF_FUNC_perf_event_output,
__BPF_FUNC_MAX_ID,
};

Expand Down
1 change: 1 addition & 0 deletions include/uapi/linux/perf_event.h
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,7 @@ enum perf_sw_ids {
PERF_COUNT_SW_ALIGNMENT_FAULTS = 7,
PERF_COUNT_SW_EMULATION_FAULTS = 8,
PERF_COUNT_SW_DUMMY = 9,
PERF_COUNT_SW_BPF_OUTPUT = 10,

PERF_COUNT_SW_MAX, /* non-ABI */
};
Expand Down
2 changes: 2 additions & 0 deletions kernel/bpf/arraymap.c
Original file line number Diff line number Diff line change
Expand Up @@ -295,6 +295,8 @@ static void *perf_event_fd_array_get_ptr(struct bpf_map *map, int fd)
return (void *)attr;

if (attr->type != PERF_TYPE_RAW &&
!(attr->type == PERF_TYPE_SOFTWARE &&
attr->config == PERF_COUNT_SW_BPF_OUTPUT) &&
attr->type != PERF_TYPE_HARDWARE) {
perf_event_release_kernel(event);
return ERR_PTR(-EINVAL);
Expand Down
3 changes: 2 additions & 1 deletion kernel/bpf/verifier.c
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,7 @@ static const struct {
} func_limit[] = {
{BPF_MAP_TYPE_PROG_ARRAY, BPF_FUNC_tail_call},
{BPF_MAP_TYPE_PERF_EVENT_ARRAY, BPF_FUNC_perf_event_read},
{BPF_MAP_TYPE_PERF_EVENT_ARRAY, BPF_FUNC_perf_event_output},
};

static void print_verifier_state(struct verifier_env *env)
Expand Down Expand Up @@ -910,7 +911,7 @@ static int check_map_func_compatibility(struct bpf_map *map, int func_id)
* don't allow any other map type to be passed into
* the special func;
*/
if (bool_map != bool_func)
if (bool_func && bool_map != bool_func)
return -EINVAL;
}

Expand Down
46 changes: 46 additions & 0 deletions kernel/trace/bpf_trace.c
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,50 @@ const struct bpf_func_proto bpf_perf_event_read_proto = {
.arg2_type = ARG_ANYTHING,
};

static u64 bpf_perf_event_output(u64 r1, u64 r2, u64 index, u64 r4, u64 size)
{
struct pt_regs *regs = (struct pt_regs *) (long) r1;
struct bpf_map *map = (struct bpf_map *) (long) r2;
struct bpf_array *array = container_of(map, struct bpf_array, map);
void *data = (void *) (long) r4;
struct perf_sample_data sample_data;
struct perf_event *event;
struct perf_raw_record raw = {
.size = size,
.data = data,
};

if (unlikely(index >= array->map.max_entries))
return -E2BIG;

event = (struct perf_event *)array->ptrs[index];
if (unlikely(!event))
return -ENOENT;

if (unlikely(event->attr.type != PERF_TYPE_SOFTWARE ||
event->attr.config != PERF_COUNT_SW_BPF_OUTPUT))
return -EINVAL;

if (unlikely(event->oncpu != smp_processor_id()))
return -EOPNOTSUPP;

perf_sample_data_init(&sample_data, 0, 0);
sample_data.raw = &raw;
perf_event_output(event, &sample_data, regs);
return 0;
}

static const struct bpf_func_proto bpf_perf_event_output_proto = {
.func = bpf_perf_event_output,
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_CONST_MAP_PTR,
.arg3_type = ARG_ANYTHING,
.arg4_type = ARG_PTR_TO_STACK,
.arg5_type = ARG_CONST_STACK_SIZE,
};

static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func_id)
{
switch (func_id) {
Expand Down Expand Up @@ -242,6 +286,8 @@ static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func
return &bpf_get_smp_processor_id_proto;
case BPF_FUNC_perf_event_read:
return &bpf_perf_event_read_proto;
case BPF_FUNC_perf_event_output:
return &bpf_perf_event_output_proto;
default:
return NULL;
}
Expand Down

0 comments on commit a43eec3

Please sign in to comment.