Skip to content

Commit

Permalink
Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf
Browse files Browse the repository at this point in the history
Daniel Borkmann says:

====================
pull-request: bpf 2021-07-09

The following pull-request contains BPF updates for your *net* tree.

We've added 9 non-merge commits during the last 9 day(s) which contain
a total of 13 files changed, 118 insertions(+), 62 deletions(-).

The main changes are:

1) Fix runqslower task->state access from BPF, from SanjayKumar Jeyakumar.

2) Fix subprog poke descriptor tracking use-after-free, from John Fastabend.

3) Fix sparse complaint from prior devmap RCU conversion, from Toke Høiland-Jørgensen.

4) Fix missing va_end in bpftool JIT json dump's error path, from Gu Shengxian.

5) Fix tools/bpf install target from missing runqslower install, from Wei Li.

6) Fix xdpsock BPF sample to unload program on shared umem option, from Wang Hai.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
davem330 committed Jul 9, 2021
2 parents 67a9c94 + 1fb5ba2 commit 5d52c90
Show file tree
Hide file tree
Showing 13 changed files with 118 additions and 62 deletions.
3 changes: 3 additions & 0 deletions arch/x86/net/bpf_jit_comp.c
Original file line number Diff line number Diff line change
Expand Up @@ -570,6 +570,9 @@ static void bpf_tail_call_direct_fixup(struct bpf_prog *prog)

for (i = 0; i < prog->aux->size_poke_tab; i++) {
poke = &prog->aux->poke_tab[i];
if (poke->aux && poke->aux != prog->aux)
continue;

WARN_ON_ONCE(READ_ONCE(poke->tailcall_target_stable));

if (poke->reason != BPF_POKE_REASON_TAIL_CALL)
Expand Down
1 change: 1 addition & 0 deletions include/linux/bpf.h
Original file line number Diff line number Diff line change
Expand Up @@ -780,6 +780,7 @@ struct bpf_jit_poke_descriptor {
void *tailcall_target;
void *tailcall_bypass;
void *bypass_addr;
void *aux;
union {
struct {
struct bpf_map *map;
Expand Down
8 changes: 7 additions & 1 deletion kernel/bpf/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -2236,8 +2236,14 @@ static void bpf_prog_free_deferred(struct work_struct *work)
#endif
if (aux->dst_trampoline)
bpf_trampoline_put(aux->dst_trampoline);
for (i = 0; i < aux->func_cnt; i++)
for (i = 0; i < aux->func_cnt; i++) {
/* We can just unlink the subprog poke descriptor table as
* it was originally linked to the main program and is also
* released along with it.
*/
aux->func[i]->aux->poke_tab = NULL;
bpf_jit_free(aux->func[i]);
}
if (aux->func_cnt) {
kfree(aux->func);
bpf_prog_unlock_free(aux->prog);
Expand Down
6 changes: 4 additions & 2 deletions kernel/bpf/devmap.c
Original file line number Diff line number Diff line change
Expand Up @@ -558,7 +558,8 @@ int dev_map_enqueue_multi(struct xdp_buff *xdp, struct net_device *dev_rx,

if (map->map_type == BPF_MAP_TYPE_DEVMAP) {
for (i = 0; i < map->max_entries; i++) {
dst = READ_ONCE(dtab->netdev_map[i]);
dst = rcu_dereference_check(dtab->netdev_map[i],
rcu_read_lock_bh_held());
if (!is_valid_dst(dst, xdp, exclude_ifindex))
continue;

Expand Down Expand Up @@ -654,7 +655,8 @@ int dev_map_redirect_multi(struct net_device *dev, struct sk_buff *skb,

if (map->map_type == BPF_MAP_TYPE_DEVMAP) {
for (i = 0; i < map->max_entries; i++) {
dst = READ_ONCE(dtab->netdev_map[i]);
dst = rcu_dereference_check(dtab->netdev_map[i],
rcu_read_lock_bh_held());
if (!dst || dst->dev->ifindex == exclude_ifindex)
continue;

Expand Down
60 changes: 21 additions & 39 deletions kernel/bpf/verifier.c
Original file line number Diff line number Diff line change
Expand Up @@ -12121,33 +12121,19 @@ static int jit_subprogs(struct bpf_verifier_env *env)
goto out_free;
func[i]->is_func = 1;
func[i]->aux->func_idx = i;
/* the btf and func_info will be freed only at prog->aux */
/* Below members will be freed only at prog->aux */
func[i]->aux->btf = prog->aux->btf;
func[i]->aux->func_info = prog->aux->func_info;
func[i]->aux->poke_tab = prog->aux->poke_tab;
func[i]->aux->size_poke_tab = prog->aux->size_poke_tab;

for (j = 0; j < prog->aux->size_poke_tab; j++) {
u32 insn_idx = prog->aux->poke_tab[j].insn_idx;
int ret;
struct bpf_jit_poke_descriptor *poke;

if (!(insn_idx >= subprog_start &&
insn_idx <= subprog_end))
continue;

ret = bpf_jit_add_poke_descriptor(func[i],
&prog->aux->poke_tab[j]);
if (ret < 0) {
verbose(env, "adding tail call poke descriptor failed\n");
goto out_free;
}

func[i]->insnsi[insn_idx - subprog_start].imm = ret + 1;

map_ptr = func[i]->aux->poke_tab[ret].tail_call.map;
ret = map_ptr->ops->map_poke_track(map_ptr, func[i]->aux);
if (ret < 0) {
verbose(env, "tracking tail call prog failed\n");
goto out_free;
}
poke = &prog->aux->poke_tab[j];
if (poke->insn_idx < subprog_end &&
poke->insn_idx >= subprog_start)
poke->aux = func[i]->aux;
}

/* Use bpf_prog_F_tag to indicate functions in stack traces.
Expand Down Expand Up @@ -12178,18 +12164,6 @@ static int jit_subprogs(struct bpf_verifier_env *env)
cond_resched();
}

/* Untrack main program's aux structs so that during map_poke_run()
* we will not stumble upon the unfilled poke descriptors; each
* of the main program's poke descs got distributed across subprogs
* and got tracked onto map, so we are sure that none of them will
* be missed after the operation below
*/
for (i = 0; i < prog->aux->size_poke_tab; i++) {
map_ptr = prog->aux->poke_tab[i].tail_call.map;

map_ptr->ops->map_poke_untrack(map_ptr, prog->aux);
}

/* at this point all bpf functions were successfully JITed
* now populate all bpf_calls with correct addresses and
* run last pass of JIT
Expand Down Expand Up @@ -12267,14 +12241,22 @@ static int jit_subprogs(struct bpf_verifier_env *env)
bpf_prog_jit_attempt_done(prog);
return 0;
out_free:
/* We failed JIT'ing, so at this point we need to unregister poke
* descriptors from subprogs, so that kernel is not attempting to
* patch it anymore as we're freeing the subprog JIT memory.
*/
for (i = 0; i < prog->aux->size_poke_tab; i++) {
map_ptr = prog->aux->poke_tab[i].tail_call.map;
map_ptr->ops->map_poke_untrack(map_ptr, prog->aux);
}
/* At this point we're guaranteed that poke descriptors are not
* live anymore. We can just unlink its descriptor table as it's
* released with the main prog.
*/
for (i = 0; i < env->subprog_cnt; i++) {
if (!func[i])
continue;

for (j = 0; j < func[i]->aux->size_poke_tab; j++) {
map_ptr = func[i]->aux->poke_tab[j].tail_call.map;
map_ptr->ops->map_poke_untrack(map_ptr, func[i]->aux);
}
func[i]->aux->poke_tab = NULL;
bpf_jit_free(func[i]);
}
kfree(func);
Expand Down
1 change: 1 addition & 0 deletions samples/bpf/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -331,6 +331,7 @@ $(obj)/%.o: $(src)/%.c
-Wno-gnu-variable-sized-type-not-at-end \
-Wno-address-of-packed-member -Wno-tautological-compare \
-Wno-unknown-warning-option $(CLANG_ARCH_ARGS) \
-fno-asynchronous-unwind-tables \
-I$(srctree)/samples/bpf/ -include asm_goto_workaround.h \
-O2 -emit-llvm -Xclang -disable-llvm-passes -c $< -o - | \
$(OPT) -O2 -mtriple=bpf-pc-linux | $(LLVM_DIS) | \
Expand Down
28 changes: 28 additions & 0 deletions samples/bpf/xdpsock_user.c
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ static int opt_xsk_frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE;
static int opt_timeout = 1000;
static bool opt_need_wakeup = true;
static u32 opt_num_xsks = 1;
static u32 prog_id;
static bool opt_busy_poll;
static bool opt_reduced_cap;

Expand Down Expand Up @@ -461,6 +462,23 @@ static void *poller(void *arg)
return NULL;
}

static void remove_xdp_program(void)
{
u32 curr_prog_id = 0;

if (bpf_get_link_xdp_id(opt_ifindex, &curr_prog_id, opt_xdp_flags)) {
printf("bpf_get_link_xdp_id failed\n");
exit(EXIT_FAILURE);
}

if (prog_id == curr_prog_id)
bpf_set_link_xdp_fd(opt_ifindex, -1, opt_xdp_flags);
else if (!curr_prog_id)
printf("couldn't find a prog id on a given interface\n");
else
printf("program on interface changed, not removing\n");
}

static void int_exit(int sig)
{
benchmark_done = true;
Expand All @@ -471,6 +489,9 @@ static void __exit_with_error(int error, const char *file, const char *func,
{
fprintf(stderr, "%s:%s:%i: errno: %d/\"%s\"\n", file, func,
line, error, strerror(error));

if (opt_num_xsks > 1)
remove_xdp_program();
exit(EXIT_FAILURE);
}

Expand All @@ -490,6 +511,9 @@ static void xdpsock_cleanup(void)
if (write(sock, &cmd, sizeof(int)) < 0)
exit_with_error(errno);
}

if (opt_num_xsks > 1)
remove_xdp_program();
}

static void swap_mac_addresses(void *data)
Expand Down Expand Up @@ -857,6 +881,10 @@ static struct xsk_socket_info *xsk_configure_socket(struct xsk_umem_info *umem,
if (ret)
exit_with_error(-ret);

ret = bpf_get_link_xdp_id(opt_ifindex, &prog_id, opt_xdp_flags);
if (ret)
exit_with_error(-ret);

xsk->app_stats.rx_empty_polls = 0;
xsk->app_stats.fill_fail_polls = 0;
xsk->app_stats.copy_tx_sendtos = 0;
Expand Down
7 changes: 2 additions & 5 deletions tools/bpf/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ clean: bpftool_clean runqslower_clean resolve_btfids_clean
$(Q)$(RM) -- $(OUTPUT)FEATURE-DUMP.bpf
$(Q)$(RM) -r -- $(OUTPUT)feature

install: $(PROGS) bpftool_install runqslower_install
install: $(PROGS) bpftool_install
$(call QUIET_INSTALL, bpf_jit_disasm)
$(Q)$(INSTALL) -m 0755 -d $(DESTDIR)$(prefix)/bin
$(Q)$(INSTALL) $(OUTPUT)bpf_jit_disasm $(DESTDIR)$(prefix)/bin/bpf_jit_disasm
Expand All @@ -118,9 +118,6 @@ bpftool_clean:
runqslower:
$(call descend,runqslower)

runqslower_install:
$(call descend,runqslower,install)

runqslower_clean:
$(call descend,runqslower,clean)

Expand All @@ -131,5 +128,5 @@ resolve_btfids_clean:
$(call descend,resolve_btfids,clean)

.PHONY: all install clean bpftool bpftool_install bpftool_clean \
runqslower runqslower_install runqslower_clean \
runqslower runqslower_clean \
resolve_btfids resolve_btfids_clean
6 changes: 4 additions & 2 deletions tools/bpf/bpftool/jit_disasm.c
Original file line number Diff line number Diff line change
Expand Up @@ -43,11 +43,13 @@ static int fprintf_json(void *out, const char *fmt, ...)
{
va_list ap;
char *s;
int err;

va_start(ap, fmt);
if (vasprintf(&s, fmt, ap) < 0)
return -1;
err = vasprintf(&s, fmt, ap);
va_end(ap);
if (err < 0)
return -1;

if (!oper_count) {
int i;
Expand Down
2 changes: 1 addition & 1 deletion tools/bpf/runqslower/runqslower.bpf.c
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ int handle__sched_switch(u64 *ctx)
u32 pid;

/* ivcsw: treat like an enqueue event and store timestamp */
if (prev->state == TASK_RUNNING)
if (prev->__state == TASK_RUNNING)
trace_enqueue(prev);

pid = next->pid;
Expand Down
4 changes: 2 additions & 2 deletions tools/lib/bpf/libbpf.c
Original file line number Diff line number Diff line change
Expand Up @@ -10136,7 +10136,7 @@ int bpf_link__unpin(struct bpf_link *link)

err = unlink(link->pin_path);
if (err != 0)
return libbpf_err_errno(err);
return -errno;

pr_debug("link fd=%d: unpinned from %s\n", link->fd, link->pin_path);
zfree(&link->pin_path);
Expand Down Expand Up @@ -11197,7 +11197,7 @@ int perf_buffer__poll(struct perf_buffer *pb, int timeout_ms)

cnt = epoll_wait(pb->epoll_fd, pb->events, pb->cpu_cnt, timeout_ms);
if (cnt < 0)
return libbpf_err_errno(cnt);
return -errno;

for (i = 0; i < cnt; i++) {
struct perf_cpu_buf *cpu_buf = pb->events[i].data.ptr;
Expand Down
36 changes: 26 additions & 10 deletions tools/testing/selftests/bpf/prog_tests/tailcalls.c
Original file line number Diff line number Diff line change
Expand Up @@ -715,6 +715,8 @@ static void test_tailcall_bpf2bpf_3(void)
bpf_object__close(obj);
}

#include "tailcall_bpf2bpf4.skel.h"

/* test_tailcall_bpf2bpf_4 checks that tailcall counter is correctly preserved
* across tailcalls combined with bpf2bpf calls. for making sure that tailcall
* counter behaves correctly, bpf program will go through following flow:
Expand All @@ -727,10 +729,15 @@ static void test_tailcall_bpf2bpf_3(void)
* the loop begins. At the end of the test make sure that the global counter is
* equal to 31, because tailcall counter includes the first two tailcalls
* whereas global counter is incremented only on loop presented on flow above.
*
* The noise parameter is used to insert bpf_map_update calls into the logic
* to force verifier to patch instructions. This allows us to ensure jump
* logic remains correct with instruction movement.
*/
static void test_tailcall_bpf2bpf_4(void)
static void test_tailcall_bpf2bpf_4(bool noise)
{
int err, map_fd, prog_fd, main_fd, data_fd, i, val;
int err, map_fd, prog_fd, main_fd, data_fd, i;
struct tailcall_bpf2bpf4__bss val;
struct bpf_map *prog_array, *data_map;
struct bpf_program *prog;
struct bpf_object *obj;
Expand Down Expand Up @@ -774,11 +781,6 @@ static void test_tailcall_bpf2bpf_4(void)
goto out;
}

err = bpf_prog_test_run(main_fd, 1, &pkt_v4, sizeof(pkt_v4), 0,
&duration, &retval, NULL);
CHECK(err || retval != sizeof(pkt_v4) * 3, "tailcall", "err %d errno %d retval %d\n",
err, errno, retval);

data_map = bpf_object__find_map_by_name(obj, "tailcall.bss");
if (CHECK_FAIL(!data_map || !bpf_map__is_internal(data_map)))
return;
Expand All @@ -787,10 +789,22 @@ static void test_tailcall_bpf2bpf_4(void)
if (CHECK_FAIL(map_fd < 0))
return;

i = 0;
val.noise = noise;
val.count = 0;
err = bpf_map_update_elem(data_fd, &i, &val, BPF_ANY);
if (CHECK_FAIL(err))
goto out;

err = bpf_prog_test_run(main_fd, 1, &pkt_v4, sizeof(pkt_v4), 0,
&duration, &retval, NULL);
CHECK(err || retval != sizeof(pkt_v4) * 3, "tailcall", "err %d errno %d retval %d\n",
err, errno, retval);

i = 0;
err = bpf_map_lookup_elem(data_fd, &i, &val);
CHECK(err || val != 31, "tailcall count", "err %d errno %d count %d\n",
err, errno, val);
CHECK(err || val.count != 31, "tailcall count", "err %d errno %d count %d\n",
err, errno, val.count);

out:
bpf_object__close(obj);
Expand All @@ -815,5 +829,7 @@ void test_tailcalls(void)
if (test__start_subtest("tailcall_bpf2bpf_3"))
test_tailcall_bpf2bpf_3();
if (test__start_subtest("tailcall_bpf2bpf_4"))
test_tailcall_bpf2bpf_4();
test_tailcall_bpf2bpf_4(false);
if (test__start_subtest("tailcall_bpf2bpf_5"))
test_tailcall_bpf2bpf_4(true);
}
18 changes: 18 additions & 0 deletions tools/testing/selftests/bpf/progs/tailcall_bpf2bpf4.c
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,13 @@
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>

struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
__uint(max_entries, 1);
__uint(key_size, sizeof(__u32));
__uint(value_size, sizeof(__u32));
} nop_table SEC(".maps");

struct {
__uint(type, BPF_MAP_TYPE_PROG_ARRAY);
__uint(max_entries, 3);
Expand All @@ -10,10 +17,21 @@ struct {
} jmp_table SEC(".maps");

int count = 0;
int noise = 0;

__always_inline int subprog_noise(void)
{
__u32 key = 0;

bpf_map_lookup_elem(&nop_table, &key);
return 0;
}

__noinline
int subprog_tail_2(struct __sk_buff *skb)
{
if (noise)
subprog_noise();
bpf_tail_call_static(skb, &jmp_table, 2);
return skb->len * 3;
}
Expand Down

0 comments on commit 5d52c90

Please sign in to comment.