Skip to content

Commit

Permalink
Merge branch 'Sleepable local storage'
Browse files Browse the repository at this point in the history
KP Singh says:

====================

Local storage is currently unusable in sleepable helpers. One of the
important use cases of local_storage is to attach security (or
performance) contextual information to kernel objects in LSM / tracing
programs to be used later in the life-cyle of the object.

Sometimes this context can only be gathered from sleepable programs
(because it needs accesing __user pointers or helpers like
bpf_ima_inode_hash). Allowing local storage to be used from sleepable
programs allows such context to be managed with the benefits of
local_storage.

# v2 -> v3

* Fixed some RCU issues pointed by Martin
* Added Martin's ack

# v1 -> v2

* Generalize RCU checks (will send a separate patch for updating
  non local storage code where this can be used).
* Add missing RCU lock checks from v1
====================

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
  • Loading branch information
Alexei Starovoitov committed Dec 30, 2021
2 parents 3ccdcee + 0ae6eff commit 1705c62
Show file tree
Hide file tree
Showing 8 changed files with 73 additions and 49 deletions.
5 changes: 5 additions & 0 deletions include/linux/bpf_local_storage.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@

#define BPF_LOCAL_STORAGE_CACHE_SIZE 16

#define bpf_rcu_lock_held() \
(rcu_read_lock_held() || rcu_read_lock_trace_held() || \
rcu_read_lock_bh_held())
struct bpf_local_storage_map_bucket {
struct hlist_head list;
raw_spinlock_t lock;
Expand Down Expand Up @@ -162,4 +165,6 @@ struct bpf_local_storage_data *
bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
void *value, u64 map_flags);

void bpf_local_storage_free_rcu(struct rcu_head *rcu);

#endif /* _BPF_LOCAL_STORAGE_H */
6 changes: 5 additions & 1 deletion kernel/bpf/bpf_inode_storage.c
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include <linux/bpf_lsm.h>
#include <linux/btf_ids.h>
#include <linux/fdtable.h>
#include <linux/rcupdate_trace.h>

DEFINE_BPF_STORAGE_CACHE(inode_cache);

Expand Down Expand Up @@ -44,7 +45,8 @@ static struct bpf_local_storage_data *inode_storage_lookup(struct inode *inode,
if (!bsb)
return NULL;

inode_storage = rcu_dereference(bsb->storage);
inode_storage =
rcu_dereference_check(bsb->storage, bpf_rcu_lock_held());
if (!inode_storage)
return NULL;

Expand Down Expand Up @@ -172,6 +174,7 @@ BPF_CALL_4(bpf_inode_storage_get, struct bpf_map *, map, struct inode *, inode,
{
struct bpf_local_storage_data *sdata;

WARN_ON_ONCE(!bpf_rcu_lock_held());
if (flags & ~(BPF_LOCAL_STORAGE_GET_F_CREATE))
return (unsigned long)NULL;

Expand Down Expand Up @@ -204,6 +207,7 @@ BPF_CALL_4(bpf_inode_storage_get, struct bpf_map *, map, struct inode *, inode,
BPF_CALL_2(bpf_inode_storage_delete,
struct bpf_map *, map, struct inode *, inode)
{
WARN_ON_ONCE(!bpf_rcu_lock_held());
if (!inode)
return -EINVAL;

Expand Down
50 changes: 37 additions & 13 deletions kernel/bpf/bpf_local_storage.c
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@
#include <net/sock.h>
#include <uapi/linux/sock_diag.h>
#include <uapi/linux/btf.h>
#include <linux/rcupdate.h>
#include <linux/rcupdate_trace.h>
#include <linux/rcupdate_wait.h>

#define BPF_LOCAL_STORAGE_CREATE_FLAG_MASK (BPF_F_NO_PREALLOC | BPF_F_CLONE)

Expand Down Expand Up @@ -81,6 +84,22 @@ bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner,
return NULL;
}

void bpf_local_storage_free_rcu(struct rcu_head *rcu)
{
struct bpf_local_storage *local_storage;

local_storage = container_of(rcu, struct bpf_local_storage, rcu);
kfree_rcu(local_storage, rcu);
}

static void bpf_selem_free_rcu(struct rcu_head *rcu)
{
struct bpf_local_storage_elem *selem;

selem = container_of(rcu, struct bpf_local_storage_elem, rcu);
kfree_rcu(selem, rcu);
}

/* local_storage->lock must be held and selem->local_storage == local_storage.
* The caller must ensure selem->smap is still valid to be
* dereferenced for its smap->elem_size and smap->cache_idx.
Expand All @@ -93,7 +112,7 @@ bool bpf_selem_unlink_storage_nolock(struct bpf_local_storage *local_storage,
bool free_local_storage;
void *owner;

smap = rcu_dereference(SDATA(selem)->smap);
smap = rcu_dereference_check(SDATA(selem)->smap, bpf_rcu_lock_held());
owner = local_storage->owner;

/* All uncharging on the owner must be done first.
Expand All @@ -118,21 +137,20 @@ bool bpf_selem_unlink_storage_nolock(struct bpf_local_storage *local_storage,
*
* Although the unlock will be done under
* rcu_read_lock(), it is more intutivie to
* read if kfree_rcu(local_storage, rcu) is done
* read if the freeing of the storage is done
* after the raw_spin_unlock_bh(&local_storage->lock).
*
* Hence, a "bool free_local_storage" is returned
* to the caller which then calls the kfree_rcu()
* after unlock.
* to the caller which then calls then frees the storage after
* all the RCU grace periods have expired.
*/
}
hlist_del_init_rcu(&selem->snode);
if (rcu_access_pointer(local_storage->cache[smap->cache_idx]) ==
SDATA(selem))
RCU_INIT_POINTER(local_storage->cache[smap->cache_idx], NULL);

kfree_rcu(selem, rcu);

call_rcu_tasks_trace(&selem->rcu, bpf_selem_free_rcu);
return free_local_storage;
}

Expand All @@ -146,15 +164,17 @@ static void __bpf_selem_unlink_storage(struct bpf_local_storage_elem *selem)
/* selem has already been unlinked from sk */
return;

local_storage = rcu_dereference(selem->local_storage);
local_storage = rcu_dereference_check(selem->local_storage,
bpf_rcu_lock_held());
raw_spin_lock_irqsave(&local_storage->lock, flags);
if (likely(selem_linked_to_storage(selem)))
free_local_storage = bpf_selem_unlink_storage_nolock(
local_storage, selem, true);
raw_spin_unlock_irqrestore(&local_storage->lock, flags);

if (free_local_storage)
kfree_rcu(local_storage, rcu);
call_rcu_tasks_trace(&local_storage->rcu,
bpf_local_storage_free_rcu);
}

void bpf_selem_link_storage_nolock(struct bpf_local_storage *local_storage,
Expand All @@ -174,7 +194,7 @@ void bpf_selem_unlink_map(struct bpf_local_storage_elem *selem)
/* selem has already be unlinked from smap */
return;

smap = rcu_dereference(SDATA(selem)->smap);
smap = rcu_dereference_check(SDATA(selem)->smap, bpf_rcu_lock_held());
b = select_bucket(smap, selem);
raw_spin_lock_irqsave(&b->lock, flags);
if (likely(selem_linked_to_map(selem)))
Expand Down Expand Up @@ -213,12 +233,14 @@ bpf_local_storage_lookup(struct bpf_local_storage *local_storage,
struct bpf_local_storage_elem *selem;

/* Fast path (cache hit) */
sdata = rcu_dereference(local_storage->cache[smap->cache_idx]);
sdata = rcu_dereference_check(local_storage->cache[smap->cache_idx],
bpf_rcu_lock_held());
if (sdata && rcu_access_pointer(sdata->smap) == smap)
return sdata;

/* Slow path (cache miss) */
hlist_for_each_entry_rcu(selem, &local_storage->list, snode)
hlist_for_each_entry_rcu(selem, &local_storage->list, snode,
rcu_read_lock_trace_held())
if (rcu_access_pointer(SDATA(selem)->smap) == smap)
break;

Expand Down Expand Up @@ -306,7 +328,8 @@ int bpf_local_storage_alloc(void *owner,
* bucket->list, first_selem can be freed immediately
* (instead of kfree_rcu) because
* bpf_local_storage_map_free() does a
* synchronize_rcu() before walking the bucket->list.
* synchronize_rcu_mult (waiting for both sleepable and
* normal programs) before walking the bucket->list.
* Hence, no one is accessing selem from the
* bucket->list under rcu_read_lock().
*/
Expand Down Expand Up @@ -342,7 +365,8 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
!map_value_has_spin_lock(&smap->map)))
return ERR_PTR(-EINVAL);

local_storage = rcu_dereference(*owner_storage(smap, owner));
local_storage = rcu_dereference_check(*owner_storage(smap, owner),
bpf_rcu_lock_held());
if (!local_storage || hlist_empty(&local_storage->list)) {
/* Very first elem for the owner */
err = check_flags(NULL, map_flags);
Expand Down
6 changes: 5 additions & 1 deletion kernel/bpf/bpf_task_storage.c
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include <uapi/linux/btf.h>
#include <linux/btf_ids.h>
#include <linux/fdtable.h>
#include <linux/rcupdate_trace.h>

DEFINE_BPF_STORAGE_CACHE(task_cache);

Expand Down Expand Up @@ -59,7 +60,8 @@ task_storage_lookup(struct task_struct *task, struct bpf_map *map,
struct bpf_local_storage *task_storage;
struct bpf_local_storage_map *smap;

task_storage = rcu_dereference(task->bpf_storage);
task_storage =
rcu_dereference_check(task->bpf_storage, bpf_rcu_lock_held());
if (!task_storage)
return NULL;

Expand Down Expand Up @@ -229,6 +231,7 @@ BPF_CALL_4(bpf_task_storage_get, struct bpf_map *, map, struct task_struct *,
{
struct bpf_local_storage_data *sdata;

WARN_ON_ONCE(!bpf_rcu_lock_held());
if (flags & ~(BPF_LOCAL_STORAGE_GET_F_CREATE))
return (unsigned long)NULL;

Expand Down Expand Up @@ -260,6 +263,7 @@ BPF_CALL_2(bpf_task_storage_delete, struct bpf_map *, map, struct task_struct *,
{
int ret;

WARN_ON_ONCE(!bpf_rcu_lock_held());
if (!task)
return -EINVAL;

Expand Down
3 changes: 3 additions & 0 deletions kernel/bpf/verifier.c
Original file line number Diff line number Diff line change
Expand Up @@ -11874,6 +11874,9 @@ static int check_map_prog_compatibility(struct bpf_verifier_env *env,
}
break;
case BPF_MAP_TYPE_RINGBUF:
case BPF_MAP_TYPE_INODE_STORAGE:
case BPF_MAP_TYPE_SK_STORAGE:
case BPF_MAP_TYPE_TASK_STORAGE:
break;
default:
verbose(env,
Expand Down
8 changes: 7 additions & 1 deletion net/core/bpf_sk_storage.c
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include <net/sock.h>
#include <uapi/linux/sock_diag.h>
#include <uapi/linux/btf.h>
#include <linux/rcupdate_trace.h>

DEFINE_BPF_STORAGE_CACHE(sk_cache);

Expand All @@ -22,7 +23,8 @@ bpf_sk_storage_lookup(struct sock *sk, struct bpf_map *map, bool cacheit_lockit)
struct bpf_local_storage *sk_storage;
struct bpf_local_storage_map *smap;

sk_storage = rcu_dereference(sk->sk_bpf_storage);
sk_storage =
rcu_dereference_check(sk->sk_bpf_storage, bpf_rcu_lock_held());
if (!sk_storage)
return NULL;

Expand Down Expand Up @@ -258,6 +260,7 @@ BPF_CALL_4(bpf_sk_storage_get, struct bpf_map *, map, struct sock *, sk,
{
struct bpf_local_storage_data *sdata;

WARN_ON_ONCE(!bpf_rcu_lock_held());
if (!sk || !sk_fullsock(sk) || flags > BPF_SK_STORAGE_GET_F_CREATE)
return (unsigned long)NULL;

Expand Down Expand Up @@ -288,6 +291,7 @@ BPF_CALL_4(bpf_sk_storage_get, struct bpf_map *, map, struct sock *, sk,

BPF_CALL_2(bpf_sk_storage_delete, struct bpf_map *, map, struct sock *, sk)
{
WARN_ON_ONCE(!bpf_rcu_lock_held());
if (!sk || !sk_fullsock(sk))
return -EINVAL;

Expand Down Expand Up @@ -416,6 +420,7 @@ static bool bpf_sk_storage_tracing_allowed(const struct bpf_prog *prog)
BPF_CALL_4(bpf_sk_storage_get_tracing, struct bpf_map *, map, struct sock *, sk,
void *, value, u64, flags)
{
WARN_ON_ONCE(!bpf_rcu_lock_held());
if (in_hardirq() || in_nmi())
return (unsigned long)NULL;

Expand All @@ -425,6 +430,7 @@ BPF_CALL_4(bpf_sk_storage_get_tracing, struct bpf_map *, map, struct sock *, sk,
BPF_CALL_2(bpf_sk_storage_delete_tracing, struct bpf_map *, map,
struct sock *, sk)
{
WARN_ON_ONCE(!bpf_rcu_lock_held());
if (in_hardirq() || in_nmi())
return -EPERM;

Expand Down
20 changes: 6 additions & 14 deletions tools/testing/selftests/bpf/prog_tests/test_local_storage.c
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,6 @@ static unsigned int duration;
struct storage {
void *inode;
unsigned int value;
/* Lock ensures that spin locked versions of local stoage operations
* also work, most operations in this tests are still single threaded
*/
struct bpf_spin_lock lock;
};

/* Fork and exec the provided rm binary and return the exit code of the
Expand Down Expand Up @@ -66,27 +62,24 @@ static int run_self_unlink(int *monitored_pid, const char *rm_path)

static bool check_syscall_operations(int map_fd, int obj_fd)
{
struct storage val = { .value = TEST_STORAGE_VALUE, .lock = { 0 } },
lookup_val = { .value = 0, .lock = { 0 } };
struct storage val = { .value = TEST_STORAGE_VALUE },
lookup_val = { .value = 0 };
int err;

/* Looking up an existing element should fail initially */
err = bpf_map_lookup_elem_flags(map_fd, &obj_fd, &lookup_val,
BPF_F_LOCK);
err = bpf_map_lookup_elem_flags(map_fd, &obj_fd, &lookup_val, 0);
if (CHECK(!err || errno != ENOENT, "bpf_map_lookup_elem",
"err:%d errno:%d\n", err, errno))
return false;

/* Create a new element */
err = bpf_map_update_elem(map_fd, &obj_fd, &val,
BPF_NOEXIST | BPF_F_LOCK);
err = bpf_map_update_elem(map_fd, &obj_fd, &val, BPF_NOEXIST);
if (CHECK(err < 0, "bpf_map_update_elem", "err:%d errno:%d\n", err,
errno))
return false;

/* Lookup the newly created element */
err = bpf_map_lookup_elem_flags(map_fd, &obj_fd, &lookup_val,
BPF_F_LOCK);
err = bpf_map_lookup_elem_flags(map_fd, &obj_fd, &lookup_val, 0);
if (CHECK(err < 0, "bpf_map_lookup_elem", "err:%d errno:%d", err,
errno))
return false;
Expand All @@ -102,8 +95,7 @@ static bool check_syscall_operations(int map_fd, int obj_fd)
return false;

/* The lookup should fail, now that the element has been deleted */
err = bpf_map_lookup_elem_flags(map_fd, &obj_fd, &lookup_val,
BPF_F_LOCK);
err = bpf_map_lookup_elem_flags(map_fd, &obj_fd, &lookup_val, 0);
if (CHECK(!err || errno != ENOENT, "bpf_map_lookup_elem",
"err:%d errno:%d\n", err, errno))
return false;
Expand Down
Loading

0 comments on commit 1705c62

Please sign in to comment.