Skip to content

Commit

Permalink
Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf
Browse files Browse the repository at this point in the history
Daniel Borkmann says:

====================
pull-request: bpf 2020-12-28

The following pull-request contains BPF updates for your *net* tree.

There is a small merge conflict between bpf tree commit 69ca310
("bpf: Save correct stopping point in file seq iteration") and net tree
commit 66ed594 ("bpf/task_iter: In task_file_seq_get_next use
task_lookup_next_fd_rcu"). The get_files_struct() does not exist anymore
in net, so take the hunk in HEAD and add the `info->tid = curr_tid` to
the error path:

  [...]
                curr_task = task_seq_get_next(ns, &curr_tid, true);
                if (!curr_task) {
                        info->task = NULL;
                        info->tid = curr_tid;
                        return NULL;
                }

                /* set info->task and info->tid */
  [...]

We've added 10 non-merge commits during the last 9 day(s) which contain
a total of 11 files changed, 75 insertions(+), 20 deletions(-).

The main changes are:

1) Various AF_XDP fixes such as fill/completion ring leak on failed bind and
   fixing a race in skb mode's backpressure mechanism, from Magnus Karlsson.

2) Fix latency spikes on lockdep enabled kernels by adding a rescheduling
   point to BPF hashtab initialization, from Eric Dumazet.

3) Fix a splat in task iterator by saving the correct stopping point in the
   seq file iteration, from Jonathan Lemon.

4) Fix BPF maps selftest by adding retries in case hashtab returns EBUSY
   errors on update/deletes, from Andrii Nakryiko.

5) Fix BPF selftest error reporting to something more user friendly if the
   vmlinux BTF cannot be found, from Kamal Mostafa.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
davem330 committed Dec 28, 2020
2 parents 1fef735 + a61daaf commit 4bfc471
Show file tree
Hide file tree
Showing 11 changed files with 81 additions and 27 deletions.
4 changes: 0 additions & 4 deletions include/net/xdp_sock.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,10 +58,6 @@ struct xdp_sock {

struct xsk_queue *tx ____cacheline_aligned_in_smp;
struct list_head tx_list;
/* Mutual exclusion of NAPI TX thread and sendmsg error paths
* in the SKB destructor callback.
*/
spinlock_t tx_completion_lock;
/* Protects generic receive. */
spinlock_t rx_lock;

Expand Down
5 changes: 5 additions & 0 deletions include/net/xsk_buff_pool.h
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,11 @@ struct xsk_buff_pool {
bool dma_need_sync;
bool unaligned;
void *addrs;
/* Mutual exclusion of the completion ring in the SKB mode. Two cases to protect:
* NAPI TX thread and sendmsg error paths in the SKB destructor callback and when
* sockets share a single cq when the same netdev and queue id is shared.
*/
spinlock_t cq_lock;
struct xdp_buff_xsk *free_heads[];
};

Expand Down
1 change: 1 addition & 0 deletions kernel/bpf/hashtab.c
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,7 @@ static void htab_init_buckets(struct bpf_htab *htab)
lockdep_set_class(&htab->buckets[i].lock,
&htab->lockdep_key);
}
cond_resched();
}
}

Expand Down
1 change: 0 additions & 1 deletion kernel/bpf/syscall.c
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
#include <linux/fs.h>
#include <linux/license.h>
#include <linux/filter.h>
#include <linux/version.h>
#include <linux/kernel.h>
#include <linux/idr.h>
#include <linux/cred.h>
Expand Down
18 changes: 9 additions & 9 deletions kernel/bpf/task_iter.c
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ static struct task_struct *task_seq_get_next(struct pid_namespace *ns,
if (!task) {
++*tid;
goto retry;
} else if (skip_if_dup_files && task->tgid != task->pid &&
} else if (skip_if_dup_files && !thread_group_leader(task) &&
task->files == task->group_leader->files) {
put_task_struct(task);
task = NULL;
Expand Down Expand Up @@ -151,14 +151,14 @@ task_file_seq_get_next(struct bpf_iter_seq_task_file_info *info)
curr_task = info->task;
curr_fd = info->fd;
} else {
curr_task = task_seq_get_next(ns, &curr_tid, true);
if (!curr_task) {
info->task = NULL;
return NULL;
}

/* set info->task and info->tid */
info->task = curr_task;
curr_task = task_seq_get_next(ns, &curr_tid, true);
if (!curr_task) {
info->task = NULL;
info->tid = curr_tid;
return NULL;
}

/* set info->task and info->tid */
if (curr_tid == info->tid) {
curr_fd = info->fd;
} else {
Expand Down
16 changes: 13 additions & 3 deletions net/xdp/xsk.c
Original file line number Diff line number Diff line change
Expand Up @@ -423,9 +423,9 @@ static void xsk_destruct_skb(struct sk_buff *skb)
struct xdp_sock *xs = xdp_sk(skb->sk);
unsigned long flags;

spin_lock_irqsave(&xs->tx_completion_lock, flags);
spin_lock_irqsave(&xs->pool->cq_lock, flags);
xskq_prod_submit_addr(xs->pool->cq, addr);
spin_unlock_irqrestore(&xs->tx_completion_lock, flags);
spin_unlock_irqrestore(&xs->pool->cq_lock, flags);

sock_wfree(skb);
}
Expand All @@ -437,6 +437,7 @@ static int xsk_generic_xmit(struct sock *sk)
bool sent_frame = false;
struct xdp_desc desc;
struct sk_buff *skb;
unsigned long flags;
int err = 0;

mutex_lock(&xs->mutex);
Expand Down Expand Up @@ -468,10 +469,13 @@ static int xsk_generic_xmit(struct sock *sk)
* if there is space in it. This avoids having to implement
* any buffering in the Tx path.
*/
spin_lock_irqsave(&xs->pool->cq_lock, flags);
if (unlikely(err) || xskq_prod_reserve(xs->pool->cq)) {
spin_unlock_irqrestore(&xs->pool->cq_lock, flags);
kfree_skb(skb);
goto out;
}
spin_unlock_irqrestore(&xs->pool->cq_lock, flags);

skb->dev = xs->dev;
skb->priority = sk->sk_priority;
Expand All @@ -483,6 +487,9 @@ static int xsk_generic_xmit(struct sock *sk)
if (err == NETDEV_TX_BUSY) {
/* Tell user-space to retry the send */
skb->destructor = sock_wfree;
spin_lock_irqsave(&xs->pool->cq_lock, flags);
xskq_prod_cancel(xs->pool->cq);
spin_unlock_irqrestore(&xs->pool->cq_lock, flags);
/* Free skb without triggering the perf drop trace */
consume_skb(skb);
err = -EAGAIN;
Expand Down Expand Up @@ -878,6 +885,10 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
}
}

/* FQ and CQ are now owned by the buffer pool and cleaned up with it. */
xs->fq_tmp = NULL;
xs->cq_tmp = NULL;

xs->dev = dev;
xs->zc = xs->umem->zc;
xs->queue_id = qid;
Expand Down Expand Up @@ -1299,7 +1310,6 @@ static int xsk_create(struct net *net, struct socket *sock, int protocol,
xs->state = XSK_READY;
mutex_init(&xs->mutex);
spin_lock_init(&xs->rx_lock);
spin_lock_init(&xs->tx_completion_lock);

INIT_LIST_HEAD(&xs->map_list);
spin_lock_init(&xs->map_list_lock);
Expand Down
3 changes: 1 addition & 2 deletions net/xdp/xsk_buff_pool.c
Original file line number Diff line number Diff line change
Expand Up @@ -71,12 +71,11 @@ struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs,
INIT_LIST_HEAD(&pool->free_list);
INIT_LIST_HEAD(&pool->xsk_tx_list);
spin_lock_init(&pool->xsk_tx_list_lock);
spin_lock_init(&pool->cq_lock);
refcount_set(&pool->users, 1);

pool->fq = xs->fq_tmp;
pool->cq = xs->cq_tmp;
xs->fq_tmp = NULL;
xs->cq_tmp = NULL;

for (i = 0; i < pool->free_heads_cnt; i++) {
xskb = &pool->heads[i];
Expand Down
5 changes: 5 additions & 0 deletions net/xdp/xsk_queue.h
Original file line number Diff line number Diff line change
Expand Up @@ -334,6 +334,11 @@ static inline bool xskq_prod_is_full(struct xsk_queue *q)
return xskq_prod_nb_free(q, 1) ? false : true;
}

static inline void xskq_prod_cancel(struct xsk_queue *q)
{
q->cached_prod--;
}

static inline int xskq_prod_reserve(struct xsk_queue *q)
{
if (xskq_prod_is_full(q))
Expand Down
3 changes: 3 additions & 0 deletions tools/testing/selftests/bpf/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,9 @@ VMLINUX_BTF_PATHS ?= $(if $(O),$(O)/vmlinux) \
/sys/kernel/btf/vmlinux \
/boot/vmlinux-$(shell uname -r)
VMLINUX_BTF ?= $(abspath $(firstword $(wildcard $(VMLINUX_BTF_PATHS))))
ifeq ($(VMLINUX_BTF),)
$(error Cannot find a vmlinux for VMLINUX_BTF at any of "$(VMLINUX_BTF_PATHS)")
endif

# Define simple and short `make test_progs`, `make test_sysctl`, etc targets
# to build individual tests.
Expand Down
48 changes: 42 additions & 6 deletions tools/testing/selftests/bpf/test_maps.c
Original file line number Diff line number Diff line change
Expand Up @@ -1312,22 +1312,58 @@ static void test_map_stress(void)
#define DO_UPDATE 1
#define DO_DELETE 0

#define MAP_RETRIES 20

static int map_update_retriable(int map_fd, const void *key, const void *value,
int flags, int attempts)
{
while (bpf_map_update_elem(map_fd, key, value, flags)) {
if (!attempts || (errno != EAGAIN && errno != EBUSY))
return -errno;

usleep(1);
attempts--;
}

return 0;
}

static int map_delete_retriable(int map_fd, const void *key, int attempts)
{
while (bpf_map_delete_elem(map_fd, key)) {
if (!attempts || (errno != EAGAIN && errno != EBUSY))
return -errno;

usleep(1);
attempts--;
}

return 0;
}

static void test_update_delete(unsigned int fn, void *data)
{
int do_update = ((int *)data)[1];
int fd = ((int *)data)[0];
int i, key, value;
int i, key, value, err;

for (i = fn; i < MAP_SIZE; i += TASKS) {
key = value = i;

if (do_update) {
assert(bpf_map_update_elem(fd, &key, &value,
BPF_NOEXIST) == 0);
assert(bpf_map_update_elem(fd, &key, &value,
BPF_EXIST) == 0);
err = map_update_retriable(fd, &key, &value, BPF_NOEXIST, MAP_RETRIES);
if (err)
printf("error %d %d\n", err, errno);
assert(err == 0);
err = map_update_retriable(fd, &key, &value, BPF_EXIST, MAP_RETRIES);
if (err)
printf("error %d %d\n", err, errno);
assert(err == 0);
} else {
assert(bpf_map_delete_elem(fd, &key) == 0);
err = map_delete_retriable(fd, &key, MAP_RETRIES);
if (err)
printf("error %d %d\n", err, errno);
assert(err == 0);
}
}
}
Expand Down
4 changes: 2 additions & 2 deletions tools/testing/selftests/bpf/xdpxceiver.c
Original file line number Diff line number Diff line change
Expand Up @@ -715,7 +715,7 @@ static void worker_pkt_dump(void)
int payload = *((uint32_t *)(pkt_buf[iter]->payload + PKT_HDR_SIZE));

if (payload == EOT) {
ksft_print_msg("End-of-tranmission frame received\n");
ksft_print_msg("End-of-transmission frame received\n");
fprintf(stdout, "---------------------------------------\n");
break;
}
Expand Down Expand Up @@ -747,7 +747,7 @@ static void worker_pkt_validate(void)
}

if (payloadseqnum == EOT) {
ksft_print_msg("End-of-tranmission frame received: PASS\n");
ksft_print_msg("End-of-transmission frame received: PASS\n");
sigvar = 1;
break;
}
Expand Down

0 comments on commit 4bfc471

Please sign in to comment.