mirror of
				https://github.com/torvalds/linux.git
				synced 2025-10-31 16:48:26 +02:00 
			
		
		
		
	bpf: Enable non-atomic allocations in local storage
Currently, local storage memory can only be allocated atomically (GFP_ATOMIC). This restriction is too strict for sleepable bpf programs. In this patch, the verifier detects whether the program is sleepable, and passes the corresponding GFP_KERNEL or GFP_ATOMIC flag as a 5th argument to bpf_task/sk/inode_storage_get. This flag will propagate down to the local storage functions that allocate memory. Please note that bpf_task/sk/inode_storage_update_elem functions are invoked by userspace applications through syscalls. Preemption is disabled before bpf_task/sk/inode_storage_update_elem is called, which means they will always have to allocate memory atomically. Signed-off-by: Joanne Koong <joannelkoong@gmail.com> Signed-off-by: Alexei Starovoitov <ast@kernel.org> Acked-by: KP Singh <kpsingh@kernel.org> Acked-by: Martin KaFai Lau <kafai@fb.com> Link: https://lore.kernel.org/bpf/20220318045553.3091807-2-joannekoong@fb.com
This commit is contained in:
		
							parent
							
								
									a8fee96202
								
							
						
					
					
						commit
						b00fa38a9c
					
				
					 6 changed files with 84 additions and 41 deletions
				
			
		|  | @ -154,16 +154,17 @@ void bpf_selem_unlink_map(struct bpf_local_storage_elem *selem); | |||
| 
 | ||||
| struct bpf_local_storage_elem * | ||||
| bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner, void *value, | ||||
| 		bool charge_mem); | ||||
| 		bool charge_mem, gfp_t gfp_flags); | ||||
| 
 | ||||
| int | ||||
| bpf_local_storage_alloc(void *owner, | ||||
| 			struct bpf_local_storage_map *smap, | ||||
| 			struct bpf_local_storage_elem *first_selem); | ||||
| 			struct bpf_local_storage_elem *first_selem, | ||||
| 			gfp_t gfp_flags); | ||||
| 
 | ||||
| struct bpf_local_storage_data * | ||||
| bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap, | ||||
| 			 void *value, u64 map_flags); | ||||
| 			 void *value, u64 map_flags, gfp_t gfp_flags); | ||||
| 
 | ||||
| void bpf_local_storage_free_rcu(struct rcu_head *rcu); | ||||
| 
 | ||||
|  |  | |||
|  | @ -136,7 +136,7 @@ static int bpf_fd_inode_storage_update_elem(struct bpf_map *map, void *key, | |||
| 
 | ||||
| 	sdata = bpf_local_storage_update(f->f_inode, | ||||
| 					 (struct bpf_local_storage_map *)map, | ||||
| 					 value, map_flags); | ||||
| 					 value, map_flags, GFP_ATOMIC); | ||||
| 	fput(f); | ||||
| 	return PTR_ERR_OR_ZERO(sdata); | ||||
| } | ||||
|  | @ -169,8 +169,9 @@ static int bpf_fd_inode_storage_delete_elem(struct bpf_map *map, void *key) | |||
| 	return err; | ||||
| } | ||||
| 
 | ||||
| BPF_CALL_4(bpf_inode_storage_get, struct bpf_map *, map, struct inode *, inode, | ||||
| 	   void *, value, u64, flags) | ||||
| /* *gfp_flags* is a hidden argument provided by the verifier */ | ||||
| BPF_CALL_5(bpf_inode_storage_get, struct bpf_map *, map, struct inode *, inode, | ||||
| 	   void *, value, u64, flags, gfp_t, gfp_flags) | ||||
| { | ||||
| 	struct bpf_local_storage_data *sdata; | ||||
| 
 | ||||
|  | @ -196,7 +197,7 @@ BPF_CALL_4(bpf_inode_storage_get, struct bpf_map *, map, struct inode *, inode, | |||
| 	if (flags & BPF_LOCAL_STORAGE_GET_F_CREATE) { | ||||
| 		sdata = bpf_local_storage_update( | ||||
| 			inode, (struct bpf_local_storage_map *)map, value, | ||||
| 			BPF_NOEXIST); | ||||
| 			BPF_NOEXIST, gfp_flags); | ||||
| 		return IS_ERR(sdata) ? (unsigned long)NULL : | ||||
| 					     (unsigned long)sdata->data; | ||||
| 	} | ||||
|  |  | |||
|  | @ -63,7 +63,7 @@ static bool selem_linked_to_map(const struct bpf_local_storage_elem *selem) | |||
| 
 | ||||
| struct bpf_local_storage_elem * | ||||
| bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner, | ||||
| 		void *value, bool charge_mem) | ||||
| 		void *value, bool charge_mem, gfp_t gfp_flags) | ||||
| { | ||||
| 	struct bpf_local_storage_elem *selem; | ||||
| 
 | ||||
|  | @ -71,7 +71,7 @@ bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner, | |||
| 		return NULL; | ||||
| 
 | ||||
| 	selem = bpf_map_kzalloc(&smap->map, smap->elem_size, | ||||
| 				GFP_ATOMIC | __GFP_NOWARN); | ||||
| 				gfp_flags | __GFP_NOWARN); | ||||
| 	if (selem) { | ||||
| 		if (value) | ||||
| 			memcpy(SDATA(selem)->data, value, smap->map.value_size); | ||||
|  | @ -282,7 +282,8 @@ static int check_flags(const struct bpf_local_storage_data *old_sdata, | |||
| 
 | ||||
| int bpf_local_storage_alloc(void *owner, | ||||
| 			    struct bpf_local_storage_map *smap, | ||||
| 			    struct bpf_local_storage_elem *first_selem) | ||||
| 			    struct bpf_local_storage_elem *first_selem, | ||||
| 			    gfp_t gfp_flags) | ||||
| { | ||||
| 	struct bpf_local_storage *prev_storage, *storage; | ||||
| 	struct bpf_local_storage **owner_storage_ptr; | ||||
|  | @ -293,7 +294,7 @@ int bpf_local_storage_alloc(void *owner, | |||
| 		return err; | ||||
| 
 | ||||
| 	storage = bpf_map_kzalloc(&smap->map, sizeof(*storage), | ||||
| 				  GFP_ATOMIC | __GFP_NOWARN); | ||||
| 				  gfp_flags | __GFP_NOWARN); | ||||
| 	if (!storage) { | ||||
| 		err = -ENOMEM; | ||||
| 		goto uncharge; | ||||
|  | @ -350,10 +351,10 @@ int bpf_local_storage_alloc(void *owner, | |||
|  */ | ||||
| struct bpf_local_storage_data * | ||||
| bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap, | ||||
| 			 void *value, u64 map_flags) | ||||
| 			 void *value, u64 map_flags, gfp_t gfp_flags) | ||||
| { | ||||
| 	struct bpf_local_storage_data *old_sdata = NULL; | ||||
| 	struct bpf_local_storage_elem *selem; | ||||
| 	struct bpf_local_storage_elem *selem = NULL; | ||||
| 	struct bpf_local_storage *local_storage; | ||||
| 	unsigned long flags; | ||||
| 	int err; | ||||
|  | @ -365,6 +366,9 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap, | |||
| 		     !map_value_has_spin_lock(&smap->map))) | ||||
| 		return ERR_PTR(-EINVAL); | ||||
| 
 | ||||
| 	if (gfp_flags == GFP_KERNEL && (map_flags & ~BPF_F_LOCK) != BPF_NOEXIST) | ||||
| 		return ERR_PTR(-EINVAL); | ||||
| 
 | ||||
| 	local_storage = rcu_dereference_check(*owner_storage(smap, owner), | ||||
| 					      bpf_rcu_lock_held()); | ||||
| 	if (!local_storage || hlist_empty(&local_storage->list)) { | ||||
|  | @ -373,11 +377,11 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap, | |||
| 		if (err) | ||||
| 			return ERR_PTR(err); | ||||
| 
 | ||||
| 		selem = bpf_selem_alloc(smap, owner, value, true); | ||||
| 		selem = bpf_selem_alloc(smap, owner, value, true, gfp_flags); | ||||
| 		if (!selem) | ||||
| 			return ERR_PTR(-ENOMEM); | ||||
| 
 | ||||
| 		err = bpf_local_storage_alloc(owner, smap, selem); | ||||
| 		err = bpf_local_storage_alloc(owner, smap, selem, gfp_flags); | ||||
| 		if (err) { | ||||
| 			kfree(selem); | ||||
| 			mem_uncharge(smap, owner, smap->elem_size); | ||||
|  | @ -404,6 +408,12 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap, | |||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	if (gfp_flags == GFP_KERNEL) { | ||||
| 		selem = bpf_selem_alloc(smap, owner, value, true, gfp_flags); | ||||
| 		if (!selem) | ||||
| 			return ERR_PTR(-ENOMEM); | ||||
| 	} | ||||
| 
 | ||||
| 	raw_spin_lock_irqsave(&local_storage->lock, flags); | ||||
| 
 | ||||
| 	/* Recheck local_storage->list under local_storage->lock */ | ||||
|  | @ -429,19 +439,21 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap, | |||
| 		goto unlock; | ||||
| 	} | ||||
| 
 | ||||
| 	/* local_storage->lock is held.  Hence, we are sure
 | ||||
| 	 * we can unlink and uncharge the old_sdata successfully | ||||
| 	 * later.  Hence, instead of charging the new selem now | ||||
| 	 * and then uncharge the old selem later (which may cause | ||||
| 	 * a potential but unnecessary charge failure),  avoid taking | ||||
| 	 * a charge at all here (the "!old_sdata" check) and the | ||||
| 	 * old_sdata will not be uncharged later during | ||||
| 	 * bpf_selem_unlink_storage_nolock(). | ||||
| 	 */ | ||||
| 	selem = bpf_selem_alloc(smap, owner, value, !old_sdata); | ||||
| 	if (!selem) { | ||||
| 		err = -ENOMEM; | ||||
| 		goto unlock_err; | ||||
| 	if (gfp_flags != GFP_KERNEL) { | ||||
| 		/* local_storage->lock is held.  Hence, we are sure
 | ||||
| 		 * we can unlink and uncharge the old_sdata successfully | ||||
| 		 * later.  Hence, instead of charging the new selem now | ||||
| 		 * and then uncharge the old selem later (which may cause | ||||
| 		 * a potential but unnecessary charge failure),  avoid taking | ||||
| 		 * a charge at all here (the "!old_sdata" check) and the | ||||
| 		 * old_sdata will not be uncharged later during | ||||
| 		 * bpf_selem_unlink_storage_nolock(). | ||||
| 		 */ | ||||
| 		selem = bpf_selem_alloc(smap, owner, value, !old_sdata, gfp_flags); | ||||
| 		if (!selem) { | ||||
| 			err = -ENOMEM; | ||||
| 			goto unlock_err; | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	/* First, link the new selem to the map */ | ||||
|  | @ -463,6 +475,10 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap, | |||
| 
 | ||||
| unlock_err: | ||||
| 	raw_spin_unlock_irqrestore(&local_storage->lock, flags); | ||||
| 	if (selem) { | ||||
| 		mem_uncharge(smap, owner, smap->elem_size); | ||||
| 		kfree(selem); | ||||
| 	} | ||||
| 	return ERR_PTR(err); | ||||
| } | ||||
| 
 | ||||
|  |  | |||
|  | @ -174,7 +174,8 @@ static int bpf_pid_task_storage_update_elem(struct bpf_map *map, void *key, | |||
| 
 | ||||
| 	bpf_task_storage_lock(); | ||||
| 	sdata = bpf_local_storage_update( | ||||
| 		task, (struct bpf_local_storage_map *)map, value, map_flags); | ||||
| 		task, (struct bpf_local_storage_map *)map, value, map_flags, | ||||
| 		GFP_ATOMIC); | ||||
| 	bpf_task_storage_unlock(); | ||||
| 
 | ||||
| 	err = PTR_ERR_OR_ZERO(sdata); | ||||
|  | @ -226,8 +227,9 @@ static int bpf_pid_task_storage_delete_elem(struct bpf_map *map, void *key) | |||
| 	return err; | ||||
| } | ||||
| 
 | ||||
| BPF_CALL_4(bpf_task_storage_get, struct bpf_map *, map, struct task_struct *, | ||||
| 	   task, void *, value, u64, flags) | ||||
| /* *gfp_flags* is a hidden argument provided by the verifier */ | ||||
| BPF_CALL_5(bpf_task_storage_get, struct bpf_map *, map, struct task_struct *, | ||||
| 	   task, void *, value, u64, flags, gfp_t, gfp_flags) | ||||
| { | ||||
| 	struct bpf_local_storage_data *sdata; | ||||
| 
 | ||||
|  | @ -250,7 +252,7 @@ BPF_CALL_4(bpf_task_storage_get, struct bpf_map *, map, struct task_struct *, | |||
| 	    (flags & BPF_LOCAL_STORAGE_GET_F_CREATE)) | ||||
| 		sdata = bpf_local_storage_update( | ||||
| 			task, (struct bpf_local_storage_map *)map, value, | ||||
| 			BPF_NOEXIST); | ||||
| 			BPF_NOEXIST, gfp_flags); | ||||
| 
 | ||||
| unlock: | ||||
| 	bpf_task_storage_unlock(); | ||||
|  |  | |||
|  | @ -13492,6 +13492,26 @@ static int do_misc_fixups(struct bpf_verifier_env *env) | |||
| 			goto patch_call_imm; | ||||
| 		} | ||||
| 
 | ||||
| 		if (insn->imm == BPF_FUNC_task_storage_get || | ||||
| 		    insn->imm == BPF_FUNC_sk_storage_get || | ||||
| 		    insn->imm == BPF_FUNC_inode_storage_get) { | ||||
| 			if (env->prog->aux->sleepable) | ||||
| 				insn_buf[0] = BPF_MOV64_IMM(BPF_REG_5, (__s32)GFP_KERNEL); | ||||
| 			else | ||||
| 				insn_buf[0] = BPF_MOV64_IMM(BPF_REG_5, (__s32)GFP_ATOMIC); | ||||
| 			insn_buf[1] = *insn; | ||||
| 			cnt = 2; | ||||
| 
 | ||||
| 			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); | ||||
| 			if (!new_prog) | ||||
| 				return -ENOMEM; | ||||
| 
 | ||||
| 			delta += cnt - 1; | ||||
| 			env->prog = prog = new_prog; | ||||
| 			insn = new_prog->insnsi + i + delta; | ||||
| 			goto patch_call_imm; | ||||
| 		} | ||||
| 
 | ||||
| 		/* BPF_EMIT_CALL() assumptions in some of the map_gen_lookup
 | ||||
| 		 * and other inlining handlers are currently limited to 64 bit | ||||
| 		 * only. | ||||
|  |  | |||
|  | @ -141,7 +141,7 @@ static int bpf_fd_sk_storage_update_elem(struct bpf_map *map, void *key, | |||
| 	if (sock) { | ||||
| 		sdata = bpf_local_storage_update( | ||||
| 			sock->sk, (struct bpf_local_storage_map *)map, value, | ||||
| 			map_flags); | ||||
| 			map_flags, GFP_ATOMIC); | ||||
| 		sockfd_put(sock); | ||||
| 		return PTR_ERR_OR_ZERO(sdata); | ||||
| 	} | ||||
|  | @ -172,7 +172,7 @@ bpf_sk_storage_clone_elem(struct sock *newsk, | |||
| { | ||||
| 	struct bpf_local_storage_elem *copy_selem; | ||||
| 
 | ||||
| 	copy_selem = bpf_selem_alloc(smap, newsk, NULL, true); | ||||
| 	copy_selem = bpf_selem_alloc(smap, newsk, NULL, true, GFP_ATOMIC); | ||||
| 	if (!copy_selem) | ||||
| 		return NULL; | ||||
| 
 | ||||
|  | @ -230,7 +230,7 @@ int bpf_sk_storage_clone(const struct sock *sk, struct sock *newsk) | |||
| 			bpf_selem_link_map(smap, copy_selem); | ||||
| 			bpf_selem_link_storage_nolock(new_sk_storage, copy_selem); | ||||
| 		} else { | ||||
| 			ret = bpf_local_storage_alloc(newsk, smap, copy_selem); | ||||
| 			ret = bpf_local_storage_alloc(newsk, smap, copy_selem, GFP_ATOMIC); | ||||
| 			if (ret) { | ||||
| 				kfree(copy_selem); | ||||
| 				atomic_sub(smap->elem_size, | ||||
|  | @ -255,8 +255,9 @@ int bpf_sk_storage_clone(const struct sock *sk, struct sock *newsk) | |||
| 	return ret; | ||||
| } | ||||
| 
 | ||||
| BPF_CALL_4(bpf_sk_storage_get, struct bpf_map *, map, struct sock *, sk, | ||||
| 	   void *, value, u64, flags) | ||||
| /* *gfp_flags* is a hidden argument provided by the verifier */ | ||||
| BPF_CALL_5(bpf_sk_storage_get, struct bpf_map *, map, struct sock *, sk, | ||||
| 	   void *, value, u64, flags, gfp_t, gfp_flags) | ||||
| { | ||||
| 	struct bpf_local_storage_data *sdata; | ||||
| 
 | ||||
|  | @ -277,7 +278,7 @@ BPF_CALL_4(bpf_sk_storage_get, struct bpf_map *, map, struct sock *, sk, | |||
| 	    refcount_inc_not_zero(&sk->sk_refcnt)) { | ||||
| 		sdata = bpf_local_storage_update( | ||||
| 			sk, (struct bpf_local_storage_map *)map, value, | ||||
| 			BPF_NOEXIST); | ||||
| 			BPF_NOEXIST, gfp_flags); | ||||
| 		/* sk must be a fullsock (guaranteed by verifier),
 | ||||
| 		 * so sock_gen_put() is unnecessary. | ||||
| 		 */ | ||||
|  | @ -417,14 +418,16 @@ static bool bpf_sk_storage_tracing_allowed(const struct bpf_prog *prog) | |||
| 	return false; | ||||
| } | ||||
| 
 | ||||
| BPF_CALL_4(bpf_sk_storage_get_tracing, struct bpf_map *, map, struct sock *, sk, | ||||
| 	   void *, value, u64, flags) | ||||
| /* *gfp_flags* is a hidden argument provided by the verifier */ | ||||
| BPF_CALL_5(bpf_sk_storage_get_tracing, struct bpf_map *, map, struct sock *, sk, | ||||
| 	   void *, value, u64, flags, gfp_t, gfp_flags) | ||||
| { | ||||
| 	WARN_ON_ONCE(!bpf_rcu_lock_held()); | ||||
| 	if (in_hardirq() || in_nmi()) | ||||
| 		return (unsigned long)NULL; | ||||
| 
 | ||||
| 	return (unsigned long)____bpf_sk_storage_get(map, sk, value, flags); | ||||
| 	return (unsigned long)____bpf_sk_storage_get(map, sk, value, flags, | ||||
| 						     gfp_flags); | ||||
| } | ||||
| 
 | ||||
| BPF_CALL_2(bpf_sk_storage_delete_tracing, struct bpf_map *, map, | ||||
|  |  | |||
		Loading…
	
		Reference in a new issue
	
	 Joanne Koong
						Joanne Koong