forked from mirrors/linux
		
	bpf: Fix NULL pointer dereference in bpf_get_local_storage() helper
Jiri Olsa reported a bug ([1]) in kernel where cgroup local
storage pointer may be NULL in bpf_get_local_storage() helper.
There are two issues uncovered by this bug:
  (1). kprobe or tracepoint prog incorrectly sets cgroup local storage
       before prog run,
  (2). due to change from preempt_disable to migrate_disable,
       preemption is possible and percpu storage might be overwritten
       by other tasks.
This issue (1) is fixed in [2]. This patch tried to address issue (2).
The following shows how things can go wrong:
  task 1:   bpf_cgroup_storage_set() for percpu local storage
         preemption happens
  task 2:   bpf_cgroup_storage_set() for percpu local storage
         preemption happens
  task 1:   run bpf program
task 1 will effectively use the percpu local storage setting by task 2
which will be either NULL or incorrect ones.
Instead of just one common local storage per cpu, this patch fixed
the issue by permitting 8 local storages per cpu and each local
storage is identified by a task_struct pointer. This way, we
allow at most 8 nested preemption between bpf_cgroup_storage_set()
and bpf_cgroup_storage_unset(). The percpu local storage slot
is released (calling bpf_cgroup_storage_unset()) by the same task
after bpf program finished running.
bpf_test_run() is also fixed to use the new bpf_cgroup_storage_set()
interface.
The patch is tested on top of [2] with reproducer in [1].
Without this patch, kernel will emit error in 2-3 minutes.
With this patch, after one hour, still no error.
 [1] https://lore.kernel.org/bpf/CAKH8qBuXCfUz=w8L+Fj74OaUpbosO29niYwTki7e3Ag044_aww@mail.gmail.com/T
 [2] https://lore.kernel.org/bpf/20210309185028.3763817-1-yhs@fb.com
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Roman Gushchin <guro@fb.com>
Link: https://lore.kernel.org/bpf/20210323055146.3334476-1-yhs@fb.com
			
			
This commit is contained in:
		
							parent
							
								
									a46410d5e4
								
							
						
					
					
						commit
						b910eaaaa4
					
				
					 5 changed files with 86 additions and 19 deletions
				
			
		| 
						 | 
					@ -20,14 +20,25 @@ struct bpf_sock_ops_kern;
 | 
				
			||||||
struct bpf_cgroup_storage;
 | 
					struct bpf_cgroup_storage;
 | 
				
			||||||
struct ctl_table;
 | 
					struct ctl_table;
 | 
				
			||||||
struct ctl_table_header;
 | 
					struct ctl_table_header;
 | 
				
			||||||
 | 
					struct task_struct;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#ifdef CONFIG_CGROUP_BPF
 | 
					#ifdef CONFIG_CGROUP_BPF
 | 
				
			||||||
 | 
					
 | 
				
			||||||
extern struct static_key_false cgroup_bpf_enabled_key[MAX_BPF_ATTACH_TYPE];
 | 
					extern struct static_key_false cgroup_bpf_enabled_key[MAX_BPF_ATTACH_TYPE];
 | 
				
			||||||
#define cgroup_bpf_enabled(type) static_branch_unlikely(&cgroup_bpf_enabled_key[type])
 | 
					#define cgroup_bpf_enabled(type) static_branch_unlikely(&cgroup_bpf_enabled_key[type])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
DECLARE_PER_CPU(struct bpf_cgroup_storage*,
 | 
					#define BPF_CGROUP_STORAGE_NEST_MAX	8
 | 
				
			||||||
		bpf_cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE]);
 | 
					
 | 
				
			||||||
 | 
					struct bpf_cgroup_storage_info {
 | 
				
			||||||
 | 
						struct task_struct *task;
 | 
				
			||||||
 | 
						struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE];
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/* For each cpu, permit maximum BPF_CGROUP_STORAGE_NEST_MAX number of tasks
 | 
				
			||||||
 | 
					 * to use bpf cgroup storage simultaneously.
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					DECLARE_PER_CPU(struct bpf_cgroup_storage_info,
 | 
				
			||||||
 | 
							bpf_cgroup_storage_info[BPF_CGROUP_STORAGE_NEST_MAX]);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#define for_each_cgroup_storage_type(stype) \
 | 
					#define for_each_cgroup_storage_type(stype) \
 | 
				
			||||||
	for (stype = 0; stype < MAX_BPF_CGROUP_STORAGE_TYPE; stype++)
 | 
						for (stype = 0; stype < MAX_BPF_CGROUP_STORAGE_TYPE; stype++)
 | 
				
			||||||
| 
						 | 
					@ -161,13 +172,42 @@ static inline enum bpf_cgroup_storage_type cgroup_storage_type(
 | 
				
			||||||
	return BPF_CGROUP_STORAGE_SHARED;
 | 
						return BPF_CGROUP_STORAGE_SHARED;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static inline void bpf_cgroup_storage_set(struct bpf_cgroup_storage
 | 
					static inline int bpf_cgroup_storage_set(struct bpf_cgroup_storage
 | 
				
			||||||
					 *storage[MAX_BPF_CGROUP_STORAGE_TYPE])
 | 
										 *storage[MAX_BPF_CGROUP_STORAGE_TYPE])
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	enum bpf_cgroup_storage_type stype;
 | 
						enum bpf_cgroup_storage_type stype;
 | 
				
			||||||
 | 
						int i, err = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						preempt_disable();
 | 
				
			||||||
 | 
						for (i = 0; i < BPF_CGROUP_STORAGE_NEST_MAX; i++) {
 | 
				
			||||||
 | 
							if (unlikely(this_cpu_read(bpf_cgroup_storage_info[i].task) != NULL))
 | 
				
			||||||
 | 
								continue;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							this_cpu_write(bpf_cgroup_storage_info[i].task, current);
 | 
				
			||||||
		for_each_cgroup_storage_type(stype)
 | 
							for_each_cgroup_storage_type(stype)
 | 
				
			||||||
		this_cpu_write(bpf_cgroup_storage[stype], storage[stype]);
 | 
								this_cpu_write(bpf_cgroup_storage_info[i].storage[stype],
 | 
				
			||||||
 | 
									       storage[stype]);
 | 
				
			||||||
 | 
							goto out;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						err = -EBUSY;
 | 
				
			||||||
 | 
						WARN_ON_ONCE(1);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					out:
 | 
				
			||||||
 | 
						preempt_enable();
 | 
				
			||||||
 | 
						return err;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static inline void bpf_cgroup_storage_unset(void)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						int i;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						for (i = 0; i < BPF_CGROUP_STORAGE_NEST_MAX; i++) {
 | 
				
			||||||
 | 
							if (unlikely(this_cpu_read(bpf_cgroup_storage_info[i].task) != current))
 | 
				
			||||||
 | 
								continue;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							this_cpu_write(bpf_cgroup_storage_info[i].task, NULL);
 | 
				
			||||||
 | 
							return;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
struct bpf_cgroup_storage *
 | 
					struct bpf_cgroup_storage *
 | 
				
			||||||
| 
						 | 
					@ -448,8 +488,9 @@ static inline int cgroup_bpf_prog_query(const union bpf_attr *attr,
 | 
				
			||||||
	return -EINVAL;
 | 
						return -EINVAL;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static inline void bpf_cgroup_storage_set(
 | 
					static inline int bpf_cgroup_storage_set(
 | 
				
			||||||
	struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE]) {}
 | 
						struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE]) { return 0; }
 | 
				
			||||||
 | 
					static inline void bpf_cgroup_storage_unset(void) {}
 | 
				
			||||||
static inline int bpf_cgroup_storage_assign(struct bpf_prog_aux *aux,
 | 
					static inline int bpf_cgroup_storage_assign(struct bpf_prog_aux *aux,
 | 
				
			||||||
					    struct bpf_map *map) { return 0; }
 | 
										    struct bpf_map *map) { return 0; }
 | 
				
			||||||
static inline struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(
 | 
					static inline struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1106,6 +1106,13 @@ int bpf_prog_array_copy(struct bpf_prog_array *old_array,
 | 
				
			||||||
/* BPF program asks to set CN on the packet. */
 | 
					/* BPF program asks to set CN on the packet. */
 | 
				
			||||||
#define BPF_RET_SET_CN						(1 << 0)
 | 
					#define BPF_RET_SET_CN						(1 << 0)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/* For BPF_PROG_RUN_ARRAY_FLAGS and __BPF_PROG_RUN_ARRAY,
 | 
				
			||||||
 | 
					 * if bpf_cgroup_storage_set() failed, the rest of programs
 | 
				
			||||||
 | 
					 * will not execute. This should be a really rare scenario
 | 
				
			||||||
 | 
					 * as it requires BPF_CGROUP_STORAGE_NEST_MAX number of
 | 
				
			||||||
 | 
					 * preemptions all between bpf_cgroup_storage_set() and
 | 
				
			||||||
 | 
					 * bpf_cgroup_storage_unset() on the same cpu.
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
#define BPF_PROG_RUN_ARRAY_FLAGS(array, ctx, func, ret_flags)		\
 | 
					#define BPF_PROG_RUN_ARRAY_FLAGS(array, ctx, func, ret_flags)		\
 | 
				
			||||||
	({								\
 | 
						({								\
 | 
				
			||||||
		struct bpf_prog_array_item *_item;			\
 | 
							struct bpf_prog_array_item *_item;			\
 | 
				
			||||||
| 
						 | 
					@ -1118,10 +1125,12 @@ int bpf_prog_array_copy(struct bpf_prog_array *old_array,
 | 
				
			||||||
		_array = rcu_dereference(array);			\
 | 
							_array = rcu_dereference(array);			\
 | 
				
			||||||
		_item = &_array->items[0];				\
 | 
							_item = &_array->items[0];				\
 | 
				
			||||||
		while ((_prog = READ_ONCE(_item->prog))) {		\
 | 
							while ((_prog = READ_ONCE(_item->prog))) {		\
 | 
				
			||||||
			bpf_cgroup_storage_set(_item->cgroup_storage);	\
 | 
								if (unlikely(bpf_cgroup_storage_set(_item->cgroup_storage)))	\
 | 
				
			||||||
 | 
									break;					\
 | 
				
			||||||
			func_ret = func(_prog, ctx);			\
 | 
								func_ret = func(_prog, ctx);			\
 | 
				
			||||||
			_ret &= (func_ret & 1);				\
 | 
								_ret &= (func_ret & 1);				\
 | 
				
			||||||
			*(ret_flags) |= (func_ret >> 1);			\
 | 
								*(ret_flags) |= (func_ret >> 1);			\
 | 
				
			||||||
 | 
								bpf_cgroup_storage_unset();			\
 | 
				
			||||||
			_item++;					\
 | 
								_item++;					\
 | 
				
			||||||
		}							\
 | 
							}							\
 | 
				
			||||||
		rcu_read_unlock();					\
 | 
							rcu_read_unlock();					\
 | 
				
			||||||
| 
						 | 
					@ -1142,9 +1151,14 @@ int bpf_prog_array_copy(struct bpf_prog_array *old_array,
 | 
				
			||||||
			goto _out;			\
 | 
								goto _out;			\
 | 
				
			||||||
		_item = &_array->items[0];		\
 | 
							_item = &_array->items[0];		\
 | 
				
			||||||
		while ((_prog = READ_ONCE(_item->prog))) {		\
 | 
							while ((_prog = READ_ONCE(_item->prog))) {		\
 | 
				
			||||||
			if (set_cg_storage)		\
 | 
								if (!set_cg_storage) {			\
 | 
				
			||||||
				bpf_cgroup_storage_set(_item->cgroup_storage);	\
 | 
					 | 
				
			||||||
				_ret &= func(_prog, ctx);	\
 | 
									_ret &= func(_prog, ctx);	\
 | 
				
			||||||
 | 
								} else {				\
 | 
				
			||||||
 | 
									if (unlikely(bpf_cgroup_storage_set(_item->cgroup_storage)))	\
 | 
				
			||||||
 | 
										break;			\
 | 
				
			||||||
 | 
									_ret &= func(_prog, ctx);	\
 | 
				
			||||||
 | 
									bpf_cgroup_storage_unset();	\
 | 
				
			||||||
 | 
								}				\
 | 
				
			||||||
			_item++;			\
 | 
								_item++;			\
 | 
				
			||||||
		}					\
 | 
							}					\
 | 
				
			||||||
_out:							\
 | 
					_out:							\
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -382,8 +382,8 @@ const struct bpf_func_proto bpf_get_current_ancestor_cgroup_id_proto = {
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#ifdef CONFIG_CGROUP_BPF
 | 
					#ifdef CONFIG_CGROUP_BPF
 | 
				
			||||||
DECLARE_PER_CPU(struct bpf_cgroup_storage*,
 | 
					DECLARE_PER_CPU(struct bpf_cgroup_storage_info,
 | 
				
			||||||
		bpf_cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE]);
 | 
							bpf_cgroup_storage_info[BPF_CGROUP_STORAGE_NEST_MAX]);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
BPF_CALL_2(bpf_get_local_storage, struct bpf_map *, map, u64, flags)
 | 
					BPF_CALL_2(bpf_get_local_storage, struct bpf_map *, map, u64, flags)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
| 
						 | 
					@ -392,10 +392,17 @@ BPF_CALL_2(bpf_get_local_storage, struct bpf_map *, map, u64, flags)
 | 
				
			||||||
	 * verifier checks that its value is correct.
 | 
						 * verifier checks that its value is correct.
 | 
				
			||||||
	 */
 | 
						 */
 | 
				
			||||||
	enum bpf_cgroup_storage_type stype = cgroup_storage_type(map);
 | 
						enum bpf_cgroup_storage_type stype = cgroup_storage_type(map);
 | 
				
			||||||
	struct bpf_cgroup_storage *storage;
 | 
						struct bpf_cgroup_storage *storage = NULL;
 | 
				
			||||||
	void *ptr;
 | 
						void *ptr;
 | 
				
			||||||
 | 
						int i;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	storage = this_cpu_read(bpf_cgroup_storage[stype]);
 | 
						for (i = 0; i < BPF_CGROUP_STORAGE_NEST_MAX; i++) {
 | 
				
			||||||
 | 
							if (unlikely(this_cpu_read(bpf_cgroup_storage_info[i].task) != current))
 | 
				
			||||||
 | 
								continue;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							storage = this_cpu_read(bpf_cgroup_storage_info[i].storage[stype]);
 | 
				
			||||||
 | 
							break;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (stype == BPF_CGROUP_STORAGE_SHARED)
 | 
						if (stype == BPF_CGROUP_STORAGE_SHARED)
 | 
				
			||||||
		ptr = &READ_ONCE(storage->buf)->data[0];
 | 
							ptr = &READ_ONCE(storage->buf)->data[0];
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -9,10 +9,11 @@
 | 
				
			||||||
#include <linux/slab.h>
 | 
					#include <linux/slab.h>
 | 
				
			||||||
#include <uapi/linux/btf.h>
 | 
					#include <uapi/linux/btf.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
DEFINE_PER_CPU(struct bpf_cgroup_storage*, bpf_cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE]);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#ifdef CONFIG_CGROUP_BPF
 | 
					#ifdef CONFIG_CGROUP_BPF
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					DEFINE_PER_CPU(struct bpf_cgroup_storage_info,
 | 
				
			||||||
 | 
						       bpf_cgroup_storage_info[BPF_CGROUP_STORAGE_NEST_MAX]);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#include "../cgroup/cgroup-internal.h"
 | 
					#include "../cgroup/cgroup-internal.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#define LOCAL_STORAGE_CREATE_FLAG_MASK					\
 | 
					#define LOCAL_STORAGE_CREATE_FLAG_MASK					\
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -106,12 +106,16 @@ static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat,
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	bpf_test_timer_enter(&t);
 | 
						bpf_test_timer_enter(&t);
 | 
				
			||||||
	do {
 | 
						do {
 | 
				
			||||||
		bpf_cgroup_storage_set(storage);
 | 
							ret = bpf_cgroup_storage_set(storage);
 | 
				
			||||||
 | 
							if (ret)
 | 
				
			||||||
 | 
								break;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		if (xdp)
 | 
							if (xdp)
 | 
				
			||||||
			*retval = bpf_prog_run_xdp(prog, ctx);
 | 
								*retval = bpf_prog_run_xdp(prog, ctx);
 | 
				
			||||||
		else
 | 
							else
 | 
				
			||||||
			*retval = BPF_PROG_RUN(prog, ctx);
 | 
								*retval = BPF_PROG_RUN(prog, ctx);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							bpf_cgroup_storage_unset();
 | 
				
			||||||
	} while (bpf_test_timer_continue(&t, repeat, &ret, time));
 | 
						} while (bpf_test_timer_continue(&t, repeat, &ret, time));
 | 
				
			||||||
	bpf_test_timer_leave(&t);
 | 
						bpf_test_timer_leave(&t);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue