mirror of
				https://github.com/torvalds/linux.git
				synced 2025-10-31 16:48:26 +02:00 
			
		
		
		
	bpf: Parameterize task iterators.
Allow creating an iterator that loops through resources of one thread/process. People could only create iterators to loop through all resources of files, vma, and tasks in the system, even though they were interested in only the resources of a specific task or process. Passing the additional parameters, people can now create an iterator to go through all resources or only the resources of a task. Signed-off-by: Kui-Feng Lee <kuifeng@fb.com> Signed-off-by: Andrii Nakryiko <andrii@kernel.org> Acked-by: Yonghong Song <yhs@fb.com> Acked-by: Martin KaFai Lau <martin.lau@kernel.org> Link: https://lore.kernel.org/bpf/20220926184957.208194-2-kuifeng@fb.com
This commit is contained in:
		
							parent
							
								
									87dbdc230d
								
							
						
					
					
						commit
						f0d74c4da1
					
				
					 4 changed files with 203 additions and 22 deletions
				
			
		|  | @ -1796,6 +1796,27 @@ int bpf_obj_get_user(const char __user *pathname, int flags); | |||
| 	extern int bpf_iter_ ## target(args);			\ | ||||
| 	int __init bpf_iter_ ## target(args) { return 0; } | ||||
| 
 | ||||
| /*
 | ||||
|  * The task type of iterators. | ||||
|  * | ||||
|  * For BPF task iterators, they can be parameterized with various | ||||
|  * parameters to visit only some of tasks. | ||||
|  * | ||||
|  * BPF_TASK_ITER_ALL (default) | ||||
|  *	Iterate over resources of every task. | ||||
|  * | ||||
|  * BPF_TASK_ITER_TID | ||||
|  *	Iterate over resources of a task/tid. | ||||
|  * | ||||
|  * BPF_TASK_ITER_TGID | ||||
|  *	Iterate over resources of every task of a process / task group. | ||||
|  */ | ||||
| enum bpf_iter_task_type { | ||||
| 	BPF_TASK_ITER_ALL = 0, | ||||
| 	BPF_TASK_ITER_TID, | ||||
| 	BPF_TASK_ITER_TGID, | ||||
| }; | ||||
| 
 | ||||
| struct bpf_iter_aux_info { | ||||
| 	/* for map_elem iter */ | ||||
| 	struct bpf_map *map; | ||||
|  | @ -1805,6 +1826,10 @@ struct bpf_iter_aux_info { | |||
| 		struct cgroup *start; /* starting cgroup */ | ||||
| 		enum bpf_cgroup_iter_order order; | ||||
| 	} cgroup; | ||||
| 	struct { | ||||
| 		enum bpf_iter_task_type	type; | ||||
| 		u32 pid; | ||||
| 	} task; | ||||
| }; | ||||
| 
 | ||||
| typedef int (*bpf_iter_attach_target_t)(struct bpf_prog *prog, | ||||
|  |  | |||
|  | @ -110,6 +110,12 @@ union bpf_iter_link_info { | |||
| 		__u32	cgroup_fd; | ||||
| 		__u64	cgroup_id; | ||||
| 	} cgroup; | ||||
| 	/* Parameters of task iterators. */ | ||||
| 	struct { | ||||
| 		__u32	tid; | ||||
| 		__u32	pid; | ||||
| 		__u32	pid_fd; | ||||
| 	} task; | ||||
| }; | ||||
| 
 | ||||
| /* BPF syscall commands, see bpf(2) man-page for more details. */ | ||||
|  |  | |||
|  | @ -12,6 +12,9 @@ | |||
| 
 | ||||
| struct bpf_iter_seq_task_common { | ||||
| 	struct pid_namespace *ns; | ||||
| 	enum bpf_iter_task_type	type; | ||||
| 	u32 pid; | ||||
| 	u32 pid_visiting; | ||||
| }; | ||||
| 
 | ||||
| struct bpf_iter_seq_task_info { | ||||
|  | @ -22,18 +25,115 @@ struct bpf_iter_seq_task_info { | |||
| 	u32 tid; | ||||
| }; | ||||
| 
 | ||||
| static struct task_struct *task_seq_get_next(struct pid_namespace *ns, | ||||
| static struct task_struct *task_group_seq_get_next(struct bpf_iter_seq_task_common *common, | ||||
| 						   u32 *tid, | ||||
| 						   bool skip_if_dup_files) | ||||
| { | ||||
| 	struct task_struct *task, *next_task; | ||||
| 	struct pid *pid; | ||||
| 	u32 saved_tid; | ||||
| 
 | ||||
| 	if (!*tid) { | ||||
| 		/* The first time, the iterator calls this function. */ | ||||
| 		pid = find_pid_ns(common->pid, common->ns); | ||||
| 		if (!pid) | ||||
| 			return NULL; | ||||
| 
 | ||||
| 		task = get_pid_task(pid, PIDTYPE_TGID); | ||||
| 		if (!task) | ||||
| 			return NULL; | ||||
| 
 | ||||
| 		*tid = common->pid; | ||||
| 		common->pid_visiting = common->pid; | ||||
| 
 | ||||
| 		return task; | ||||
| 	} | ||||
| 
 | ||||
| 	/* If the control returns to user space and comes back to the
 | ||||
| 	 * kernel again, *tid and common->pid_visiting should be the | ||||
| 	 * same for task_seq_start() to pick up the correct task. | ||||
| 	 */ | ||||
| 	if (*tid == common->pid_visiting) { | ||||
| 		pid = find_pid_ns(common->pid_visiting, common->ns); | ||||
| 		task = get_pid_task(pid, PIDTYPE_PID); | ||||
| 
 | ||||
| 		return task; | ||||
| 	} | ||||
| 
 | ||||
| 	pid = find_pid_ns(common->pid_visiting, common->ns); | ||||
| 	if (!pid) | ||||
| 		return NULL; | ||||
| 
 | ||||
| 	task = get_pid_task(pid, PIDTYPE_PID); | ||||
| 	if (!task) | ||||
| 		return NULL; | ||||
| 
 | ||||
| retry: | ||||
| 	if (!pid_alive(task)) { | ||||
| 		put_task_struct(task); | ||||
| 		return NULL; | ||||
| 	} | ||||
| 
 | ||||
| 	next_task = next_thread(task); | ||||
| 	put_task_struct(task); | ||||
| 	if (!next_task) | ||||
| 		return NULL; | ||||
| 
 | ||||
| 	saved_tid = *tid; | ||||
| 	*tid = __task_pid_nr_ns(next_task, PIDTYPE_PID, common->ns); | ||||
| 	if (!*tid || *tid == common->pid) { | ||||
| 		/* Run out of tasks of a process.  The tasks of a
 | ||||
| 		 * thread_group are linked as circular linked list. | ||||
| 		 */ | ||||
| 		*tid = saved_tid; | ||||
| 		return NULL; | ||||
| 	} | ||||
| 
 | ||||
| 	get_task_struct(next_task); | ||||
| 	common->pid_visiting = *tid; | ||||
| 
 | ||||
| 	if (skip_if_dup_files && task->files == task->group_leader->files) { | ||||
| 		task = next_task; | ||||
| 		goto retry; | ||||
| 	} | ||||
| 
 | ||||
| 	return next_task; | ||||
| } | ||||
| 
 | ||||
| static struct task_struct *task_seq_get_next(struct bpf_iter_seq_task_common *common, | ||||
| 					     u32 *tid, | ||||
| 					     bool skip_if_dup_files) | ||||
| { | ||||
| 	struct task_struct *task = NULL; | ||||
| 	struct pid *pid; | ||||
| 
 | ||||
| 	if (common->type == BPF_TASK_ITER_TID) { | ||||
| 		if (*tid && *tid != common->pid) | ||||
| 			return NULL; | ||||
| 		rcu_read_lock(); | ||||
| 		pid = find_pid_ns(common->pid, common->ns); | ||||
| 		if (pid) { | ||||
| 			task = get_pid_task(pid, PIDTYPE_TGID); | ||||
| 			*tid = common->pid; | ||||
| 		} | ||||
| 		rcu_read_unlock(); | ||||
| 
 | ||||
| 		return task; | ||||
| 	} | ||||
| 
 | ||||
| 	if (common->type == BPF_TASK_ITER_TGID) { | ||||
| 		rcu_read_lock(); | ||||
| 		task = task_group_seq_get_next(common, tid, skip_if_dup_files); | ||||
| 		rcu_read_unlock(); | ||||
| 
 | ||||
| 		return task; | ||||
| 	} | ||||
| 
 | ||||
| 	rcu_read_lock(); | ||||
| retry: | ||||
| 	pid = find_ge_pid(*tid, ns); | ||||
| 	pid = find_ge_pid(*tid, common->ns); | ||||
| 	if (pid) { | ||||
| 		*tid = pid_nr_ns(pid, ns); | ||||
| 		*tid = pid_nr_ns(pid, common->ns); | ||||
| 		task = get_pid_task(pid, PIDTYPE_PID); | ||||
| 		if (!task) { | ||||
| 			++*tid; | ||||
|  | @ -56,7 +156,7 @@ static void *task_seq_start(struct seq_file *seq, loff_t *pos) | |||
| 	struct bpf_iter_seq_task_info *info = seq->private; | ||||
| 	struct task_struct *task; | ||||
| 
 | ||||
| 	task = task_seq_get_next(info->common.ns, &info->tid, false); | ||||
| 	task = task_seq_get_next(&info->common, &info->tid, false); | ||||
| 	if (!task) | ||||
| 		return NULL; | ||||
| 
 | ||||
|  | @ -73,7 +173,7 @@ static void *task_seq_next(struct seq_file *seq, void *v, loff_t *pos) | |||
| 	++*pos; | ||||
| 	++info->tid; | ||||
| 	put_task_struct((struct task_struct *)v); | ||||
| 	task = task_seq_get_next(info->common.ns, &info->tid, false); | ||||
| 	task = task_seq_get_next(&info->common, &info->tid, false); | ||||
| 	if (!task) | ||||
| 		return NULL; | ||||
| 
 | ||||
|  | @ -117,6 +217,41 @@ static void task_seq_stop(struct seq_file *seq, void *v) | |||
| 		put_task_struct((struct task_struct *)v); | ||||
| } | ||||
| 
 | ||||
| static int bpf_iter_attach_task(struct bpf_prog *prog, | ||||
| 				union bpf_iter_link_info *linfo, | ||||
| 				struct bpf_iter_aux_info *aux) | ||||
| { | ||||
| 	unsigned int flags; | ||||
| 	struct pid *pid; | ||||
| 	pid_t tgid; | ||||
| 
 | ||||
| 	if ((!!linfo->task.tid + !!linfo->task.pid + !!linfo->task.pid_fd) > 1) | ||||
| 		return -EINVAL; | ||||
| 
 | ||||
| 	aux->task.type = BPF_TASK_ITER_ALL; | ||||
| 	if (linfo->task.tid != 0) { | ||||
| 		aux->task.type = BPF_TASK_ITER_TID; | ||||
| 		aux->task.pid = linfo->task.tid; | ||||
| 	} | ||||
| 	if (linfo->task.pid != 0) { | ||||
| 		aux->task.type = BPF_TASK_ITER_TGID; | ||||
| 		aux->task.pid = linfo->task.pid; | ||||
| 	} | ||||
| 	if (linfo->task.pid_fd != 0) { | ||||
| 		aux->task.type = BPF_TASK_ITER_TGID; | ||||
| 
 | ||||
| 		pid = pidfd_get_pid(linfo->task.pid_fd, &flags); | ||||
| 		if (IS_ERR(pid)) | ||||
| 			return PTR_ERR(pid); | ||||
| 
 | ||||
| 		tgid = pid_nr_ns(pid, task_active_pid_ns(current)); | ||||
| 		aux->task.pid = tgid; | ||||
| 		put_pid(pid); | ||||
| 	} | ||||
| 
 | ||||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
| static const struct seq_operations task_seq_ops = { | ||||
| 	.start	= task_seq_start, | ||||
| 	.next	= task_seq_next, | ||||
|  | @ -137,8 +272,7 @@ struct bpf_iter_seq_task_file_info { | |||
| static struct file * | ||||
| task_file_seq_get_next(struct bpf_iter_seq_task_file_info *info) | ||||
| { | ||||
| 	struct pid_namespace *ns = info->common.ns; | ||||
| 	u32 curr_tid = info->tid; | ||||
| 	u32 saved_tid = info->tid; | ||||
| 	struct task_struct *curr_task; | ||||
| 	unsigned int curr_fd = info->fd; | ||||
| 
 | ||||
|  | @ -151,21 +285,18 @@ task_file_seq_get_next(struct bpf_iter_seq_task_file_info *info) | |||
| 		curr_task = info->task; | ||||
| 		curr_fd = info->fd; | ||||
| 	} else { | ||||
|                 curr_task = task_seq_get_next(ns, &curr_tid, true); | ||||
| 		curr_task = task_seq_get_next(&info->common, &info->tid, true); | ||||
|                 if (!curr_task) { | ||||
|                         info->task = NULL; | ||||
|                         info->tid = curr_tid; | ||||
|                         return NULL; | ||||
|                 } | ||||
| 
 | ||||
|                 /* set info->task and info->tid */ | ||||
| 		/* set info->task */ | ||||
| 		info->task = curr_task; | ||||
| 		if (curr_tid == info->tid) { | ||||
| 		if (saved_tid == info->tid) | ||||
| 			curr_fd = info->fd; | ||||
| 		} else { | ||||
| 			info->tid = curr_tid; | ||||
| 		else | ||||
| 			curr_fd = 0; | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	rcu_read_lock(); | ||||
|  | @ -186,9 +317,15 @@ task_file_seq_get_next(struct bpf_iter_seq_task_file_info *info) | |||
| 	/* the current task is done, go to the next task */ | ||||
| 	rcu_read_unlock(); | ||||
| 	put_task_struct(curr_task); | ||||
| 
 | ||||
| 	if (info->common.type == BPF_TASK_ITER_TID) { | ||||
| 		info->task = NULL; | ||||
| 		return NULL; | ||||
| 	} | ||||
| 
 | ||||
| 	info->task = NULL; | ||||
| 	info->fd = 0; | ||||
| 	curr_tid = ++(info->tid); | ||||
| 	saved_tid = ++(info->tid); | ||||
| 	goto again; | ||||
| } | ||||
| 
 | ||||
|  | @ -269,6 +406,9 @@ static int init_seq_pidns(void *priv_data, struct bpf_iter_aux_info *aux) | |||
| 	struct bpf_iter_seq_task_common *common = priv_data; | ||||
| 
 | ||||
| 	common->ns = get_pid_ns(task_active_pid_ns(current)); | ||||
| 	common->type = aux->task.type; | ||||
| 	common->pid = aux->task.pid; | ||||
| 
 | ||||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
|  | @ -307,11 +447,10 @@ enum bpf_task_vma_iter_find_op { | |||
| static struct vm_area_struct * | ||||
| task_vma_seq_get_next(struct bpf_iter_seq_task_vma_info *info) | ||||
| { | ||||
| 	struct pid_namespace *ns = info->common.ns; | ||||
| 	enum bpf_task_vma_iter_find_op op; | ||||
| 	struct vm_area_struct *curr_vma; | ||||
| 	struct task_struct *curr_task; | ||||
| 	u32 curr_tid = info->tid; | ||||
| 	u32 saved_tid = info->tid; | ||||
| 
 | ||||
| 	/* If this function returns a non-NULL vma, it holds a reference to
 | ||||
| 	 * the task_struct, and holds read lock on vma->mm->mmap_lock. | ||||
|  | @ -371,14 +510,13 @@ task_vma_seq_get_next(struct bpf_iter_seq_task_vma_info *info) | |||
| 		} | ||||
| 	} else { | ||||
| again: | ||||
| 		curr_task = task_seq_get_next(ns, &curr_tid, true); | ||||
| 		curr_task = task_seq_get_next(&info->common, &info->tid, true); | ||||
| 		if (!curr_task) { | ||||
| 			info->tid = curr_tid + 1; | ||||
| 			info->tid++; | ||||
| 			goto finish; | ||||
| 		} | ||||
| 
 | ||||
| 		if (curr_tid != info->tid) { | ||||
| 			info->tid = curr_tid; | ||||
| 		if (saved_tid != info->tid) { | ||||
| 			/* new task, process the first vma */ | ||||
| 			op = task_vma_iter_first_vma; | ||||
| 		} else { | ||||
|  | @ -430,9 +568,12 @@ task_vma_seq_get_next(struct bpf_iter_seq_task_vma_info *info) | |||
| 	return curr_vma; | ||||
| 
 | ||||
| next_task: | ||||
| 	if (info->common.type == BPF_TASK_ITER_TID) | ||||
| 		goto finish; | ||||
| 
 | ||||
| 	put_task_struct(curr_task); | ||||
| 	info->task = NULL; | ||||
| 	curr_tid++; | ||||
| 	info->tid++; | ||||
| 	goto again; | ||||
| 
 | ||||
| finish: | ||||
|  | @ -533,6 +674,7 @@ static const struct bpf_iter_seq_info task_seq_info = { | |||
| 
 | ||||
| static struct bpf_iter_reg task_reg_info = { | ||||
| 	.target			= "task", | ||||
| 	.attach_target		= bpf_iter_attach_task, | ||||
| 	.feature		= BPF_ITER_RESCHED, | ||||
| 	.ctx_arg_info_size	= 1, | ||||
| 	.ctx_arg_info		= { | ||||
|  | @ -551,6 +693,7 @@ static const struct bpf_iter_seq_info task_file_seq_info = { | |||
| 
 | ||||
| static struct bpf_iter_reg task_file_reg_info = { | ||||
| 	.target			= "task_file", | ||||
| 	.attach_target		= bpf_iter_attach_task, | ||||
| 	.feature		= BPF_ITER_RESCHED, | ||||
| 	.ctx_arg_info_size	= 2, | ||||
| 	.ctx_arg_info		= { | ||||
|  | @ -571,6 +714,7 @@ static const struct bpf_iter_seq_info task_vma_seq_info = { | |||
| 
 | ||||
| static struct bpf_iter_reg task_vma_reg_info = { | ||||
| 	.target			= "task_vma", | ||||
| 	.attach_target		= bpf_iter_attach_task, | ||||
| 	.feature		= BPF_ITER_RESCHED, | ||||
| 	.ctx_arg_info_size	= 2, | ||||
| 	.ctx_arg_info		= { | ||||
|  |  | |||
|  | @ -110,6 +110,12 @@ union bpf_iter_link_info { | |||
| 		__u32	cgroup_fd; | ||||
| 		__u64	cgroup_id; | ||||
| 	} cgroup; | ||||
| 	/* Parameters of task iterators. */ | ||||
| 	struct { | ||||
| 		__u32	tid; | ||||
| 		__u32	pid; | ||||
| 		__u32	pid_fd; | ||||
| 	} task; | ||||
| }; | ||||
| 
 | ||||
| /* BPF syscall commands, see bpf(2) man-page for more details. */ | ||||
|  |  | |||
		Loading…
	
		Reference in a new issue
	
	 Kui-Feng Lee
						Kui-Feng Lee