forked from mirrors/linux
		
	bpf: Enable task local storage for tracing programs
To access per-task data, BPF programs usually creates a hash table with
pid as the key. This is not ideal because:
 1. The user need to estimate the proper size of the hash table, which may
    be inaccurate;
 2. Big hash tables are slow;
 3. To clean up the data properly during task terminations, the user need
    to write extra logic.
Task local storage overcomes these issues and offers a better option for
these per-task data. Task local storage is only available to BPF_LSM. Now
enable it for tracing programs.
Unlike LSM programs, tracing programs can be called in IRQ contexts.
Helpers that access task local storage are updated to use
raw_spin_lock_irqsave() instead of raw_spin_lock_bh().
Tracing programs can attach to functions on the task free path, e.g.
exit_creds(). To avoid allocating task local storage after
bpf_task_storage_free(). bpf_task_storage_get() is updated to not allocate
new storage when the task is not refcounted (task->usage == 0).
Signed-off-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: KP Singh <kpsingh@kernel.org>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/20210225234319.336131-2-songliubraving@fb.com
			
			
This commit is contained in:
		
							parent
							
								
									9c8f21e6f8
								
							
						
					
					
						commit
						a10787e6d5
					
				
					 10 changed files with 51 additions and 72 deletions
				
			
		|  | @ -1499,6 +1499,7 @@ struct bpf_prog *bpf_prog_by_id(u32 id); | |||
| struct bpf_link *bpf_link_by_id(u32 id); | ||||
| 
 | ||||
| const struct bpf_func_proto *bpf_base_func_proto(enum bpf_func_id func_id); | ||||
| void bpf_task_storage_free(struct task_struct *task); | ||||
| #else /* !CONFIG_BPF_SYSCALL */ | ||||
| static inline struct bpf_prog *bpf_prog_get(u32 ufd) | ||||
| { | ||||
|  | @ -1684,6 +1685,10 @@ bpf_base_func_proto(enum bpf_func_id func_id) | |||
| { | ||||
| 	return NULL; | ||||
| } | ||||
| 
 | ||||
| static inline void bpf_task_storage_free(struct task_struct *task) | ||||
| { | ||||
| } | ||||
| #endif /* CONFIG_BPF_SYSCALL */ | ||||
| 
 | ||||
| void __bpf_free_used_btfs(struct bpf_prog_aux *aux, | ||||
|  | @ -1886,6 +1891,8 @@ extern const struct bpf_func_proto bpf_this_cpu_ptr_proto; | |||
| extern const struct bpf_func_proto bpf_ktime_get_coarse_ns_proto; | ||||
| extern const struct bpf_func_proto bpf_sock_from_file_proto; | ||||
| extern const struct bpf_func_proto bpf_get_socket_ptr_cookie_proto; | ||||
| extern const struct bpf_func_proto bpf_task_storage_get_proto; | ||||
| extern const struct bpf_func_proto bpf_task_storage_delete_proto; | ||||
| 
 | ||||
| const struct bpf_func_proto *bpf_tracing_func_proto( | ||||
| 	enum bpf_func_id func_id, const struct bpf_prog *prog); | ||||
|  |  | |||
|  | @ -38,21 +38,9 @@ static inline struct bpf_storage_blob *bpf_inode( | |||
| 	return inode->i_security + bpf_lsm_blob_sizes.lbs_inode; | ||||
| } | ||||
| 
 | ||||
| static inline struct bpf_storage_blob *bpf_task( | ||||
| 	const struct task_struct *task) | ||||
| { | ||||
| 	if (unlikely(!task->security)) | ||||
| 		return NULL; | ||||
| 
 | ||||
| 	return task->security + bpf_lsm_blob_sizes.lbs_task; | ||||
| } | ||||
| 
 | ||||
| extern const struct bpf_func_proto bpf_inode_storage_get_proto; | ||||
| extern const struct bpf_func_proto bpf_inode_storage_delete_proto; | ||||
| extern const struct bpf_func_proto bpf_task_storage_get_proto; | ||||
| extern const struct bpf_func_proto bpf_task_storage_delete_proto; | ||||
| void bpf_inode_storage_free(struct inode *inode); | ||||
| void bpf_task_storage_free(struct task_struct *task); | ||||
| 
 | ||||
| #else /* !CONFIG_BPF_LSM */ | ||||
| 
 | ||||
|  | @ -73,20 +61,10 @@ static inline struct bpf_storage_blob *bpf_inode( | |||
| 	return NULL; | ||||
| } | ||||
| 
 | ||||
| static inline struct bpf_storage_blob *bpf_task( | ||||
| 	const struct task_struct *task) | ||||
| { | ||||
| 	return NULL; | ||||
| } | ||||
| 
 | ||||
| static inline void bpf_inode_storage_free(struct inode *inode) | ||||
| { | ||||
| } | ||||
| 
 | ||||
| static inline void bpf_task_storage_free(struct task_struct *task) | ||||
| { | ||||
| } | ||||
| 
 | ||||
| #endif /* CONFIG_BPF_LSM */ | ||||
| 
 | ||||
| #endif /* _LINUX_BPF_LSM_H */ | ||||
|  |  | |||
|  | @ -109,8 +109,8 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKHASH, sock_hash_ops) | |||
| #endif | ||||
| #ifdef CONFIG_BPF_LSM | ||||
| BPF_MAP_TYPE(BPF_MAP_TYPE_INODE_STORAGE, inode_storage_map_ops) | ||||
| BPF_MAP_TYPE(BPF_MAP_TYPE_TASK_STORAGE, task_storage_map_ops) | ||||
| #endif | ||||
| BPF_MAP_TYPE(BPF_MAP_TYPE_TASK_STORAGE, task_storage_map_ops) | ||||
| BPF_MAP_TYPE(BPF_MAP_TYPE_CPUMAP, cpu_map_ops) | ||||
| #if defined(CONFIG_XDP_SOCKETS) | ||||
| BPF_MAP_TYPE(BPF_MAP_TYPE_XSKMAP, xsk_map_ops) | ||||
|  |  | |||
|  | @ -42,6 +42,7 @@ struct audit_context; | |||
| struct backing_dev_info; | ||||
| struct bio_list; | ||||
| struct blk_plug; | ||||
| struct bpf_local_storage; | ||||
| struct capture_control; | ||||
| struct cfs_rq; | ||||
| struct fs_struct; | ||||
|  | @ -1348,6 +1349,10 @@ struct task_struct { | |||
| 	/* Used by LSM modules for access restriction: */ | ||||
| 	void				*security; | ||||
| #endif | ||||
| #ifdef CONFIG_BPF_SYSCALL | ||||
| 	/* Used by BPF task local storage */ | ||||
| 	struct bpf_local_storage __rcu	*bpf_storage; | ||||
| #endif | ||||
| 
 | ||||
| #ifdef CONFIG_GCC_PLUGIN_STACKLEAK | ||||
| 	unsigned long			lowest_stack; | ||||
|  |  | |||
|  | @ -9,8 +9,8 @@ CFLAGS_core.o += $(call cc-disable-warning, override-init) $(cflags-nogcse-yy) | |||
| obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o bpf_iter.o map_iter.o task_iter.o prog_iter.o | ||||
| obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o | ||||
| obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o ringbuf.o | ||||
| obj-$(CONFIG_BPF_SYSCALL) += bpf_local_storage.o bpf_task_storage.o | ||||
| obj-${CONFIG_BPF_LSM}	  += bpf_inode_storage.o | ||||
| obj-${CONFIG_BPF_LSM}	  += bpf_task_storage.o | ||||
| obj-$(CONFIG_BPF_SYSCALL) += disasm.o | ||||
| obj-$(CONFIG_BPF_JIT) += trampoline.o | ||||
| obj-$(CONFIG_BPF_SYSCALL) += btf.o | ||||
|  | @ -18,7 +18,6 @@ obj-$(CONFIG_BPF_JIT) += dispatcher.o | |||
| ifeq ($(CONFIG_NET),y) | ||||
| obj-$(CONFIG_BPF_SYSCALL) += devmap.o | ||||
| obj-$(CONFIG_BPF_SYSCALL) += cpumap.o | ||||
| obj-$(CONFIG_BPF_SYSCALL) += bpf_local_storage.o | ||||
| obj-$(CONFIG_BPF_SYSCALL) += offload.o | ||||
| obj-$(CONFIG_BPF_SYSCALL) += net_namespace.o | ||||
| endif | ||||
|  |  | |||
|  | @ -140,17 +140,18 @@ static void __bpf_selem_unlink_storage(struct bpf_local_storage_elem *selem) | |||
| { | ||||
| 	struct bpf_local_storage *local_storage; | ||||
| 	bool free_local_storage = false; | ||||
| 	unsigned long flags; | ||||
| 
 | ||||
| 	if (unlikely(!selem_linked_to_storage(selem))) | ||||
| 		/* selem has already been unlinked from sk */ | ||||
| 		return; | ||||
| 
 | ||||
| 	local_storage = rcu_dereference(selem->local_storage); | ||||
| 	raw_spin_lock_bh(&local_storage->lock); | ||||
| 	raw_spin_lock_irqsave(&local_storage->lock, flags); | ||||
| 	if (likely(selem_linked_to_storage(selem))) | ||||
| 		free_local_storage = bpf_selem_unlink_storage_nolock( | ||||
| 			local_storage, selem, true); | ||||
| 	raw_spin_unlock_bh(&local_storage->lock); | ||||
| 	raw_spin_unlock_irqrestore(&local_storage->lock, flags); | ||||
| 
 | ||||
| 	if (free_local_storage) | ||||
| 		kfree_rcu(local_storage, rcu); | ||||
|  | @ -167,6 +168,7 @@ void bpf_selem_unlink_map(struct bpf_local_storage_elem *selem) | |||
| { | ||||
| 	struct bpf_local_storage_map *smap; | ||||
| 	struct bpf_local_storage_map_bucket *b; | ||||
| 	unsigned long flags; | ||||
| 
 | ||||
| 	if (unlikely(!selem_linked_to_map(selem))) | ||||
| 		/* selem has already be unlinked from smap */ | ||||
|  | @ -174,21 +176,22 @@ void bpf_selem_unlink_map(struct bpf_local_storage_elem *selem) | |||
| 
 | ||||
| 	smap = rcu_dereference(SDATA(selem)->smap); | ||||
| 	b = select_bucket(smap, selem); | ||||
| 	raw_spin_lock_bh(&b->lock); | ||||
| 	raw_spin_lock_irqsave(&b->lock, flags); | ||||
| 	if (likely(selem_linked_to_map(selem))) | ||||
| 		hlist_del_init_rcu(&selem->map_node); | ||||
| 	raw_spin_unlock_bh(&b->lock); | ||||
| 	raw_spin_unlock_irqrestore(&b->lock, flags); | ||||
| } | ||||
| 
 | ||||
| void bpf_selem_link_map(struct bpf_local_storage_map *smap, | ||||
| 			struct bpf_local_storage_elem *selem) | ||||
| { | ||||
| 	struct bpf_local_storage_map_bucket *b = select_bucket(smap, selem); | ||||
| 	unsigned long flags; | ||||
| 
 | ||||
| 	raw_spin_lock_bh(&b->lock); | ||||
| 	raw_spin_lock_irqsave(&b->lock, flags); | ||||
| 	RCU_INIT_POINTER(SDATA(selem)->smap, smap); | ||||
| 	hlist_add_head_rcu(&selem->map_node, &b->list); | ||||
| 	raw_spin_unlock_bh(&b->lock); | ||||
| 	raw_spin_unlock_irqrestore(&b->lock, flags); | ||||
| } | ||||
| 
 | ||||
| void bpf_selem_unlink(struct bpf_local_storage_elem *selem) | ||||
|  | @ -224,16 +227,18 @@ bpf_local_storage_lookup(struct bpf_local_storage *local_storage, | |||
| 
 | ||||
| 	sdata = SDATA(selem); | ||||
| 	if (cacheit_lockit) { | ||||
| 		unsigned long flags; | ||||
| 
 | ||||
| 		/* spinlock is needed to avoid racing with the
 | ||||
| 		 * parallel delete.  Otherwise, publishing an already | ||||
| 		 * deleted sdata to the cache will become a use-after-free | ||||
| 		 * problem in the next bpf_local_storage_lookup(). | ||||
| 		 */ | ||||
| 		raw_spin_lock_bh(&local_storage->lock); | ||||
| 		raw_spin_lock_irqsave(&local_storage->lock, flags); | ||||
| 		if (selem_linked_to_storage(selem)) | ||||
| 			rcu_assign_pointer(local_storage->cache[smap->cache_idx], | ||||
| 					   sdata); | ||||
| 		raw_spin_unlock_bh(&local_storage->lock); | ||||
| 		raw_spin_unlock_irqrestore(&local_storage->lock, flags); | ||||
| 	} | ||||
| 
 | ||||
| 	return sdata; | ||||
|  | @ -327,6 +332,7 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap, | |||
| 	struct bpf_local_storage_data *old_sdata = NULL; | ||||
| 	struct bpf_local_storage_elem *selem; | ||||
| 	struct bpf_local_storage *local_storage; | ||||
| 	unsigned long flags; | ||||
| 	int err; | ||||
| 
 | ||||
| 	/* BPF_EXIST and BPF_NOEXIST cannot be both set */ | ||||
|  | @ -374,7 +380,7 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap, | |||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	raw_spin_lock_bh(&local_storage->lock); | ||||
| 	raw_spin_lock_irqsave(&local_storage->lock, flags); | ||||
| 
 | ||||
| 	/* Recheck local_storage->list under local_storage->lock */ | ||||
| 	if (unlikely(hlist_empty(&local_storage->list))) { | ||||
|  | @ -428,11 +434,11 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap, | |||
| 	} | ||||
| 
 | ||||
| unlock: | ||||
| 	raw_spin_unlock_bh(&local_storage->lock); | ||||
| 	raw_spin_unlock_irqrestore(&local_storage->lock, flags); | ||||
| 	return SDATA(selem); | ||||
| 
 | ||||
| unlock_err: | ||||
| 	raw_spin_unlock_bh(&local_storage->lock); | ||||
| 	raw_spin_unlock_irqrestore(&local_storage->lock, flags); | ||||
| 	return ERR_PTR(err); | ||||
| } | ||||
| 
 | ||||
|  |  | |||
|  | @ -115,10 +115,6 @@ bpf_lsm_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) | |||
| 		return &bpf_spin_lock_proto; | ||||
| 	case BPF_FUNC_spin_unlock: | ||||
| 		return &bpf_spin_unlock_proto; | ||||
| 	case BPF_FUNC_task_storage_get: | ||||
| 		return &bpf_task_storage_get_proto; | ||||
| 	case BPF_FUNC_task_storage_delete: | ||||
| 		return &bpf_task_storage_delete_proto; | ||||
| 	case BPF_FUNC_bprm_opts_set: | ||||
| 		return &bpf_bprm_opts_set_proto; | ||||
| 	case BPF_FUNC_ima_inode_hash: | ||||
|  |  | |||
|  | @ -15,7 +15,6 @@ | |||
| #include <linux/bpf_local_storage.h> | ||||
| #include <linux/filter.h> | ||||
| #include <uapi/linux/btf.h> | ||||
| #include <linux/bpf_lsm.h> | ||||
| #include <linux/btf_ids.h> | ||||
| #include <linux/fdtable.h> | ||||
| 
 | ||||
|  | @ -24,12 +23,8 @@ DEFINE_BPF_STORAGE_CACHE(task_cache); | |||
| static struct bpf_local_storage __rcu **task_storage_ptr(void *owner) | ||||
| { | ||||
| 	struct task_struct *task = owner; | ||||
| 	struct bpf_storage_blob *bsb; | ||||
| 
 | ||||
| 	bsb = bpf_task(task); | ||||
| 	if (!bsb) | ||||
| 		return NULL; | ||||
| 	return &bsb->storage; | ||||
| 	return &task->bpf_storage; | ||||
| } | ||||
| 
 | ||||
| static struct bpf_local_storage_data * | ||||
|  | @ -38,13 +33,8 @@ task_storage_lookup(struct task_struct *task, struct bpf_map *map, | |||
| { | ||||
| 	struct bpf_local_storage *task_storage; | ||||
| 	struct bpf_local_storage_map *smap; | ||||
| 	struct bpf_storage_blob *bsb; | ||||
| 
 | ||||
| 	bsb = bpf_task(task); | ||||
| 	if (!bsb) | ||||
| 		return NULL; | ||||
| 
 | ||||
| 	task_storage = rcu_dereference(bsb->storage); | ||||
| 	task_storage = rcu_dereference(task->bpf_storage); | ||||
| 	if (!task_storage) | ||||
| 		return NULL; | ||||
| 
 | ||||
|  | @ -57,16 +47,12 @@ void bpf_task_storage_free(struct task_struct *task) | |||
| 	struct bpf_local_storage_elem *selem; | ||||
| 	struct bpf_local_storage *local_storage; | ||||
| 	bool free_task_storage = false; | ||||
| 	struct bpf_storage_blob *bsb; | ||||
| 	struct hlist_node *n; | ||||
| 
 | ||||
| 	bsb = bpf_task(task); | ||||
| 	if (!bsb) | ||||
| 		return; | ||||
| 	unsigned long flags; | ||||
| 
 | ||||
| 	rcu_read_lock(); | ||||
| 
 | ||||
| 	local_storage = rcu_dereference(bsb->storage); | ||||
| 	local_storage = rcu_dereference(task->bpf_storage); | ||||
| 	if (!local_storage) { | ||||
| 		rcu_read_unlock(); | ||||
| 		return; | ||||
|  | @ -81,7 +67,7 @@ void bpf_task_storage_free(struct task_struct *task) | |||
| 	 * when unlinking elem from the local_storage->list and | ||||
| 	 * the map's bucket->list. | ||||
| 	 */ | ||||
| 	raw_spin_lock_bh(&local_storage->lock); | ||||
| 	raw_spin_lock_irqsave(&local_storage->lock, flags); | ||||
| 	hlist_for_each_entry_safe(selem, n, &local_storage->list, snode) { | ||||
| 		/* Always unlink from map before unlinking from
 | ||||
| 		 * local_storage. | ||||
|  | @ -90,7 +76,7 @@ void bpf_task_storage_free(struct task_struct *task) | |||
| 		free_task_storage = bpf_selem_unlink_storage_nolock( | ||||
| 			local_storage, selem, false); | ||||
| 	} | ||||
| 	raw_spin_unlock_bh(&local_storage->lock); | ||||
| 	raw_spin_unlock_irqrestore(&local_storage->lock, flags); | ||||
| 	rcu_read_unlock(); | ||||
| 
 | ||||
| 	/* free_task_storage should always be true as long as
 | ||||
|  | @ -150,7 +136,7 @@ static int bpf_pid_task_storage_update_elem(struct bpf_map *map, void *key, | |||
| 	 */ | ||||
| 	WARN_ON_ONCE(!rcu_read_lock_held()); | ||||
| 	task = pid_task(pid, PIDTYPE_PID); | ||||
| 	if (!task || !task_storage_ptr(task)) { | ||||
| 	if (!task) { | ||||
| 		err = -ENOENT; | ||||
| 		goto out; | ||||
| 	} | ||||
|  | @ -213,23 +199,16 @@ BPF_CALL_4(bpf_task_storage_get, struct bpf_map *, map, struct task_struct *, | |||
| 	if (flags & ~(BPF_LOCAL_STORAGE_GET_F_CREATE)) | ||||
| 		return (unsigned long)NULL; | ||||
| 
 | ||||
| 	/* explicitly check that the task_storage_ptr is not
 | ||||
| 	 * NULL as task_storage_lookup returns NULL in this case and | ||||
| 	 * bpf_local_storage_update expects the owner to have a | ||||
| 	 * valid storage pointer. | ||||
| 	 */ | ||||
| 	if (!task || !task_storage_ptr(task)) | ||||
| 	if (!task) | ||||
| 		return (unsigned long)NULL; | ||||
| 
 | ||||
| 	sdata = task_storage_lookup(task, map, true); | ||||
| 	if (sdata) | ||||
| 		return (unsigned long)sdata->data; | ||||
| 
 | ||||
| 	/* This helper must only be called from places where the lifetime of the task
 | ||||
| 	 * is guaranteed. Either by being refcounted or by being protected | ||||
| 	 * by an RCU read-side critical section. | ||||
| 	 */ | ||||
| 	if (flags & BPF_LOCAL_STORAGE_GET_F_CREATE) { | ||||
| 	/* only allocate new storage, when the task is refcounted */ | ||||
| 	if (refcount_read(&task->usage) && | ||||
| 	    (flags & BPF_LOCAL_STORAGE_GET_F_CREATE)) { | ||||
| 		sdata = bpf_local_storage_update( | ||||
| 			task, (struct bpf_local_storage_map *)map, value, | ||||
| 			BPF_NOEXIST); | ||||
|  |  | |||
|  | @ -96,6 +96,7 @@ | |||
| #include <linux/kasan.h> | ||||
| #include <linux/scs.h> | ||||
| #include <linux/io_uring.h> | ||||
| #include <linux/bpf.h> | ||||
| 
 | ||||
| #include <asm/pgalloc.h> | ||||
| #include <linux/uaccess.h> | ||||
|  | @ -734,6 +735,7 @@ void __put_task_struct(struct task_struct *tsk) | |||
| 	cgroup_free(tsk); | ||||
| 	task_numa_free(tsk, true); | ||||
| 	security_task_free(tsk); | ||||
| 	bpf_task_storage_free(tsk); | ||||
| 	exit_creds(tsk); | ||||
| 	delayacct_tsk_free(tsk); | ||||
| 	put_signal_struct(tsk->signal); | ||||
|  | @ -2062,6 +2064,9 @@ static __latent_entropy struct task_struct *copy_process( | |||
| 	p->sequential_io	= 0; | ||||
| 	p->sequential_io_avg	= 0; | ||||
| #endif | ||||
| #ifdef CONFIG_BPF_SYSCALL | ||||
| 	RCU_INIT_POINTER(p->bpf_storage, NULL); | ||||
| #endif | ||||
| 
 | ||||
| 	/* Perform scheduler related setup. Assign this task to a CPU. */ | ||||
| 	retval = sched_fork(clone_flags, p); | ||||
|  |  | |||
|  | @ -1367,6 +1367,10 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) | |||
| 		return &bpf_per_cpu_ptr_proto; | ||||
| 	case BPF_FUNC_this_cpu_ptr: | ||||
| 		return &bpf_this_cpu_ptr_proto; | ||||
| 	case BPF_FUNC_task_storage_get: | ||||
| 		return &bpf_task_storage_get_proto; | ||||
| 	case BPF_FUNC_task_storage_delete: | ||||
| 		return &bpf_task_storage_delete_proto; | ||||
| 	default: | ||||
| 		return NULL; | ||||
| 	} | ||||
|  |  | |||
		Loading…
	
		Reference in a new issue
	
	 Song Liu
						Song Liu