forked from mirrors/linux
		
	kthread: Don't allocate kthread_struct for init and umh
If kthread_is_per_cpu runs concurrently with free_kthread_struct the kthread_struct that was just freed may be read from. This bug was introduced by commit40966e316f("kthread: Ensure struct kthread is present for all kthreads"). When kthread_struct started to be allocated for all tasks that have PF_KTHREAD set. This in turn required the kthread_struct to be freed in kernel_execve and violated the assumption that kthread_struct will have the same lifetime as the task. Looking a bit deeper this only applies to callers of kernel_execve which is just the init process and the user mode helper processes. These processes really don't want to be kernel threads but are for historical reasons. Mostly that copy_thread does not know how to take a kernel mode function to the process with for processes without PF_KTHREAD or PF_IO_WORKER set. Solve this by not allocating kthread_struct for the init process and the user mode helper processes. This is done by adding a kthread member to struct kernel_clone_args. Setting kthread in fork_idle and kernel_thread. Adding user_mode_thread that works like kernel_thread except it does not set kthread. In fork only allocating the kthread_struct if .kthread is set. I have looked at kernel/kthread.c and since commit40966e316f("kthread: Ensure struct kthread is present for all kthreads") there have been no assumptions added that to_kthread or __to_kthread will not return NULL. There are a few callers of to_kthread or __to_kthread that assume a non-NULL struct kthread pointer will be returned. These functions are kthread_data(), kthread_parmme(), kthread_exit(), kthread(), kthread_park(), kthread_unpark(), kthread_stop(). All of those functions can reasonably expected to be called when it is know that a task is a kthread so that assumption seems reasonable. Cc: stable@vger.kernel.org Fixes:40966e316f("kthread: Ensure struct kthread is present for all kthreads") Reported-by: Максим Кутявин <maximkabox13@gmail.com> Link: https://lkml.kernel.org/r/20220506141512.516114-1-ebiederm@xmission.com Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
This commit is contained in:
		
							parent
							
								
									3123109284
								
							
						
					
					
						commit
						343f4c49f2
					
				
					 5 changed files with 30 additions and 8 deletions
				
			
		| 
						 | 
					@ -1308,8 +1308,6 @@ int begin_new_exec(struct linux_binprm * bprm)
 | 
				
			||||||
	if (retval)
 | 
						if (retval)
 | 
				
			||||||
		goto out_unlock;
 | 
							goto out_unlock;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (me->flags & PF_KTHREAD)
 | 
					 | 
				
			||||||
		free_kthread_struct(me);
 | 
					 | 
				
			||||||
	me->flags &= ~(PF_RANDOMIZE | PF_FORKNOEXEC | PF_KTHREAD |
 | 
						me->flags &= ~(PF_RANDOMIZE | PF_FORKNOEXEC | PF_KTHREAD |
 | 
				
			||||||
					PF_NOFREEZE | PF_NO_SETAFFINITY);
 | 
										PF_NOFREEZE | PF_NO_SETAFFINITY);
 | 
				
			||||||
	flush_thread();
 | 
						flush_thread();
 | 
				
			||||||
| 
						 | 
					@ -1955,6 +1953,10 @@ int kernel_execve(const char *kernel_filename,
 | 
				
			||||||
	int fd = AT_FDCWD;
 | 
						int fd = AT_FDCWD;
 | 
				
			||||||
	int retval;
 | 
						int retval;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (WARN_ON_ONCE((current->flags & PF_KTHREAD) &&
 | 
				
			||||||
 | 
								(current->worker_private)))
 | 
				
			||||||
 | 
							return -EINVAL;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	filename = getname_kernel(kernel_filename);
 | 
						filename = getname_kernel(kernel_filename);
 | 
				
			||||||
	if (IS_ERR(filename))
 | 
						if (IS_ERR(filename))
 | 
				
			||||||
		return PTR_ERR(filename);
 | 
							return PTR_ERR(filename);
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -32,6 +32,7 @@ struct kernel_clone_args {
 | 
				
			||||||
	size_t set_tid_size;
 | 
						size_t set_tid_size;
 | 
				
			||||||
	int cgroup;
 | 
						int cgroup;
 | 
				
			||||||
	int io_thread;
 | 
						int io_thread;
 | 
				
			||||||
 | 
						int kthread;
 | 
				
			||||||
	struct cgroup *cgrp;
 | 
						struct cgroup *cgrp;
 | 
				
			||||||
	struct css_set *cset;
 | 
						struct css_set *cset;
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
| 
						 | 
					@ -89,6 +90,7 @@ struct task_struct *create_io_thread(int (*fn)(void *), void *arg, int node);
 | 
				
			||||||
struct task_struct *fork_idle(int);
 | 
					struct task_struct *fork_idle(int);
 | 
				
			||||||
struct mm_struct *copy_init_mm(void);
 | 
					struct mm_struct *copy_init_mm(void);
 | 
				
			||||||
extern pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags);
 | 
					extern pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags);
 | 
				
			||||||
 | 
					extern pid_t user_mode_thread(int (*fn)(void *), void *arg, unsigned long flags);
 | 
				
			||||||
extern long kernel_wait4(pid_t, int __user *, int, struct rusage *);
 | 
					extern long kernel_wait4(pid_t, int __user *, int, struct rusage *);
 | 
				
			||||||
int kernel_wait(pid_t pid, int *stat);
 | 
					int kernel_wait(pid_t pid, int *stat);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -688,7 +688,7 @@ noinline void __ref rest_init(void)
 | 
				
			||||||
	 * the init task will end up wanting to create kthreads, which, if
 | 
						 * the init task will end up wanting to create kthreads, which, if
 | 
				
			||||||
	 * we schedule it before we create kthreadd, will OOPS.
 | 
						 * we schedule it before we create kthreadd, will OOPS.
 | 
				
			||||||
	 */
 | 
						 */
 | 
				
			||||||
	pid = kernel_thread(kernel_init, NULL, CLONE_FS);
 | 
						pid = user_mode_thread(kernel_init, NULL, CLONE_FS);
 | 
				
			||||||
	/*
 | 
						/*
 | 
				
			||||||
	 * Pin init on the boot CPU. Task migration is not properly working
 | 
						 * Pin init on the boot CPU. Task migration is not properly working
 | 
				
			||||||
	 * until sched_init_smp() has been run. It will set the allowed
 | 
						 * until sched_init_smp() has been run. It will set the allowed
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -2157,7 +2157,7 @@ static __latent_entropy struct task_struct *copy_process(
 | 
				
			||||||
	p->io_context = NULL;
 | 
						p->io_context = NULL;
 | 
				
			||||||
	audit_set_context(p, NULL);
 | 
						audit_set_context(p, NULL);
 | 
				
			||||||
	cgroup_fork(p);
 | 
						cgroup_fork(p);
 | 
				
			||||||
	if (p->flags & PF_KTHREAD) {
 | 
						if (args->kthread) {
 | 
				
			||||||
		if (!set_kthread_struct(p))
 | 
							if (!set_kthread_struct(p))
 | 
				
			||||||
			goto bad_fork_cleanup_delayacct;
 | 
								goto bad_fork_cleanup_delayacct;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
| 
						 | 
					@ -2548,7 +2548,8 @@ struct task_struct * __init fork_idle(int cpu)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct task_struct *task;
 | 
						struct task_struct *task;
 | 
				
			||||||
	struct kernel_clone_args args = {
 | 
						struct kernel_clone_args args = {
 | 
				
			||||||
		.flags = CLONE_VM,
 | 
							.flags		= CLONE_VM,
 | 
				
			||||||
 | 
							.kthread	= 1,
 | 
				
			||||||
	};
 | 
						};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	task = copy_process(&init_struct_pid, 0, cpu_to_node(cpu), &args);
 | 
						task = copy_process(&init_struct_pid, 0, cpu_to_node(cpu), &args);
 | 
				
			||||||
| 
						 | 
					@ -2679,6 +2680,23 @@ pid_t kernel_clone(struct kernel_clone_args *args)
 | 
				
			||||||
 * Create a kernel thread.
 | 
					 * Create a kernel thread.
 | 
				
			||||||
 */
 | 
					 */
 | 
				
			||||||
pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
 | 
					pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						struct kernel_clone_args args = {
 | 
				
			||||||
 | 
							.flags		= ((lower_32_bits(flags) | CLONE_VM |
 | 
				
			||||||
 | 
									    CLONE_UNTRACED) & ~CSIGNAL),
 | 
				
			||||||
 | 
							.exit_signal	= (lower_32_bits(flags) & CSIGNAL),
 | 
				
			||||||
 | 
							.stack		= (unsigned long)fn,
 | 
				
			||||||
 | 
							.stack_size	= (unsigned long)arg,
 | 
				
			||||||
 | 
							.kthread	= 1,
 | 
				
			||||||
 | 
						};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						return kernel_clone(&args);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * Create a user mode thread.
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					pid_t user_mode_thread(int (*fn)(void *), void *arg, unsigned long flags)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct kernel_clone_args args = {
 | 
						struct kernel_clone_args args = {
 | 
				
			||||||
		.flags		= ((lower_32_bits(flags) | CLONE_VM |
 | 
							.flags		= ((lower_32_bits(flags) | CLONE_VM |
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -132,7 +132,7 @@ static void call_usermodehelper_exec_sync(struct subprocess_info *sub_info)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/* If SIGCLD is ignored do_wait won't populate the status. */
 | 
						/* If SIGCLD is ignored do_wait won't populate the status. */
 | 
				
			||||||
	kernel_sigaction(SIGCHLD, SIG_DFL);
 | 
						kernel_sigaction(SIGCHLD, SIG_DFL);
 | 
				
			||||||
	pid = kernel_thread(call_usermodehelper_exec_async, sub_info, SIGCHLD);
 | 
						pid = user_mode_thread(call_usermodehelper_exec_async, sub_info, SIGCHLD);
 | 
				
			||||||
	if (pid < 0)
 | 
						if (pid < 0)
 | 
				
			||||||
		sub_info->retval = pid;
 | 
							sub_info->retval = pid;
 | 
				
			||||||
	else
 | 
						else
 | 
				
			||||||
| 
						 | 
					@ -171,8 +171,8 @@ static void call_usermodehelper_exec_work(struct work_struct *work)
 | 
				
			||||||
		 * want to pollute current->children, and we need a parent
 | 
							 * want to pollute current->children, and we need a parent
 | 
				
			||||||
		 * that always ignores SIGCHLD to ensure auto-reaping.
 | 
							 * that always ignores SIGCHLD to ensure auto-reaping.
 | 
				
			||||||
		 */
 | 
							 */
 | 
				
			||||||
		pid = kernel_thread(call_usermodehelper_exec_async, sub_info,
 | 
							pid = user_mode_thread(call_usermodehelper_exec_async, sub_info,
 | 
				
			||||||
				    CLONE_PARENT | SIGCHLD);
 | 
									       CLONE_PARENT | SIGCHLD);
 | 
				
			||||||
		if (pid < 0) {
 | 
							if (pid < 0) {
 | 
				
			||||||
			sub_info->retval = pid;
 | 
								sub_info->retval = pid;
 | 
				
			||||||
			umh_complete(sub_info);
 | 
								umh_complete(sub_info);
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue