forked from mirrors/linux
		
	struct pid's count is an atomic_t field used as a refcount.  Use
refcount_t for it which is basically atomic_t but does additional
checking to prevent use-after-free bugs.
For memory ordering, the only change is with the following:
 -	if ((atomic_read(&pid->count) == 1) ||
 -	     atomic_dec_and_test(&pid->count)) {
 +	if (refcount_dec_and_test(&pid->count)) {
 		kmem_cache_free(ns->pid_cachep, pid);
Here the change is from: Fully ordered --> RELEASE + ACQUIRE (as per
refcount-vs-atomic.rst) This ACQUIRE should take care of making sure the
free happens after the refcount_dec_and_test().
The above hunk also removes atomic_read() since it is not needed for the
code to work and it is unclear how beneficial it is.  The removal lets
refcount_dec_and_test() check for cases where get_pid() happened before
the object was freed.
Link: http://lkml.kernel.org/r/20190701183826.191936-1-joel@joelfernandes.org
Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
Reviewed-by: Andrea Parri <andrea.parri@amarulasolutions.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Elena Reshetova <elena.reshetova@intel.com>
Cc: Jann Horn <jannh@google.com>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: KJ Tsanaktsidis <ktsanaktsidis@zendesk.com>
Cc: Michal Hocko <mhocko@suse.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
		
	
			
		
			
				
	
	
		
			199 lines
		
	
	
	
		
			5.8 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			199 lines
		
	
	
	
		
			5.8 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
/* SPDX-License-Identifier: GPL-2.0 */
 | 
						|
#ifndef _LINUX_PID_H
 | 
						|
#define _LINUX_PID_H
 | 
						|
 | 
						|
#include <linux/rculist.h>
 | 
						|
#include <linux/wait.h>
 | 
						|
#include <linux/refcount.h>
 | 
						|
 | 
						|
enum pid_type
 | 
						|
{
 | 
						|
	PIDTYPE_PID,
 | 
						|
	PIDTYPE_TGID,
 | 
						|
	PIDTYPE_PGID,
 | 
						|
	PIDTYPE_SID,
 | 
						|
	PIDTYPE_MAX,
 | 
						|
};
 | 
						|
 | 
						|
/*
 | 
						|
 * What is struct pid?
 | 
						|
 *
 | 
						|
 * A struct pid is the kernel's internal notion of a process identifier.
 | 
						|
 * It refers to individual tasks, process groups, and sessions.  While
 | 
						|
 * there are processes attached to it the struct pid lives in a hash
 | 
						|
 * table, so it and then the processes that it refers to can be found
 | 
						|
 * quickly from the numeric pid value.  The attached processes may be
 | 
						|
 * quickly accessed by following pointers from struct pid.
 | 
						|
 *
 | 
						|
 * Storing pid_t values in the kernel and referring to them later has a
 | 
						|
 * problem.  The process originally with that pid may have exited and the
 | 
						|
 * pid allocator wrapped, and another process could have come along
 | 
						|
 * and been assigned that pid.
 | 
						|
 *
 | 
						|
 * Referring to user space processes by holding a reference to struct
 | 
						|
 * task_struct has a problem.  When the user space process exits
 | 
						|
 * the now useless task_struct is still kept.  A task_struct plus a
 | 
						|
 * stack consumes around 10K of low kernel memory.  More precisely
 | 
						|
 * this is THREAD_SIZE + sizeof(struct task_struct).  By comparison
 | 
						|
 * a struct pid is about 64 bytes.
 | 
						|
 *
 | 
						|
 * Holding a reference to struct pid solves both of these problems.
 | 
						|
 * It is small so holding a reference does not consume a lot of
 | 
						|
 * resources, and since a new struct pid is allocated when the numeric pid
 | 
						|
 * value is reused (when pids wrap around) we don't mistakenly refer to new
 | 
						|
 * processes.
 | 
						|
 */
 | 
						|
 | 
						|
 | 
						|
/*
 | 
						|
 * struct upid is used to get the id of the struct pid, as it is
 | 
						|
 * seen in particular namespace. Later the struct pid is found with
 | 
						|
 * find_pid_ns() using the int nr and struct pid_namespace *ns.
 | 
						|
 */
 | 
						|
 | 
						|
struct upid {
 | 
						|
	int nr;
 | 
						|
	struct pid_namespace *ns;
 | 
						|
};
 | 
						|
 | 
						|
struct pid
 | 
						|
{
 | 
						|
	refcount_t count;
 | 
						|
	unsigned int level;
 | 
						|
	/* lists of tasks that use this pid */
 | 
						|
	struct hlist_head tasks[PIDTYPE_MAX];
 | 
						|
	/* wait queue for pidfd notifications */
 | 
						|
	wait_queue_head_t wait_pidfd;
 | 
						|
	struct rcu_head rcu;
 | 
						|
	struct upid numbers[1];
 | 
						|
};
 | 
						|
 | 
						|
extern struct pid init_struct_pid;
 | 
						|
 | 
						|
extern const struct file_operations pidfd_fops;
 | 
						|
 | 
						|
static inline struct pid *get_pid(struct pid *pid)
 | 
						|
{
 | 
						|
	if (pid)
 | 
						|
		refcount_inc(&pid->count);
 | 
						|
	return pid;
 | 
						|
}
 | 
						|
 | 
						|
extern void put_pid(struct pid *pid);
 | 
						|
extern struct task_struct *pid_task(struct pid *pid, enum pid_type);
 | 
						|
extern struct task_struct *get_pid_task(struct pid *pid, enum pid_type);
 | 
						|
 | 
						|
extern struct pid *get_task_pid(struct task_struct *task, enum pid_type type);
 | 
						|
 | 
						|
/*
 | 
						|
 * these helpers must be called with the tasklist_lock write-held.
 | 
						|
 */
 | 
						|
extern void attach_pid(struct task_struct *task, enum pid_type);
 | 
						|
extern void detach_pid(struct task_struct *task, enum pid_type);
 | 
						|
extern void change_pid(struct task_struct *task, enum pid_type,
 | 
						|
			struct pid *pid);
 | 
						|
extern void transfer_pid(struct task_struct *old, struct task_struct *new,
 | 
						|
			 enum pid_type);
 | 
						|
 | 
						|
struct pid_namespace;
 | 
						|
extern struct pid_namespace init_pid_ns;
 | 
						|
 | 
						|
/*
 | 
						|
 * look up a PID in the hash table. Must be called with the tasklist_lock
 | 
						|
 * or rcu_read_lock() held.
 | 
						|
 *
 | 
						|
 * find_pid_ns() finds the pid in the namespace specified
 | 
						|
 * find_vpid() finds the pid by its virtual id, i.e. in the current namespace
 | 
						|
 *
 | 
						|
 * see also find_task_by_vpid() set in include/linux/sched.h
 | 
						|
 */
 | 
						|
extern struct pid *find_pid_ns(int nr, struct pid_namespace *ns);
 | 
						|
extern struct pid *find_vpid(int nr);
 | 
						|
 | 
						|
/*
 | 
						|
 * Lookup a PID in the hash table, and return with it's count elevated.
 | 
						|
 */
 | 
						|
extern struct pid *find_get_pid(int nr);
 | 
						|
extern struct pid *find_ge_pid(int nr, struct pid_namespace *);
 | 
						|
 | 
						|
extern struct pid *alloc_pid(struct pid_namespace *ns);
 | 
						|
extern void free_pid(struct pid *pid);
 | 
						|
extern void disable_pid_allocation(struct pid_namespace *ns);
 | 
						|
 | 
						|
/*
 | 
						|
 * ns_of_pid() returns the pid namespace in which the specified pid was
 | 
						|
 * allocated.
 | 
						|
 *
 | 
						|
 * NOTE:
 | 
						|
 * 	ns_of_pid() is expected to be called for a process (task) that has
 | 
						|
 * 	an attached 'struct pid' (see attach_pid(), detach_pid()) i.e @pid
 | 
						|
 * 	is expected to be non-NULL. If @pid is NULL, caller should handle
 | 
						|
 * 	the resulting NULL pid-ns.
 | 
						|
 */
 | 
						|
static inline struct pid_namespace *ns_of_pid(struct pid *pid)
 | 
						|
{
 | 
						|
	struct pid_namespace *ns = NULL;
 | 
						|
	if (pid)
 | 
						|
		ns = pid->numbers[pid->level].ns;
 | 
						|
	return ns;
 | 
						|
}
 | 
						|
 | 
						|
/*
 | 
						|
 * is_child_reaper returns true if the pid is the init process
 | 
						|
 * of the current namespace. As this one could be checked before
 | 
						|
 * pid_ns->child_reaper is assigned in copy_process, we check
 | 
						|
 * with the pid number.
 | 
						|
 */
 | 
						|
static inline bool is_child_reaper(struct pid *pid)
 | 
						|
{
 | 
						|
	return pid->numbers[pid->level].nr == 1;
 | 
						|
}
 | 
						|
 | 
						|
/*
 | 
						|
 * the helpers to get the pid's id seen from different namespaces
 | 
						|
 *
 | 
						|
 * pid_nr()    : global id, i.e. the id seen from the init namespace;
 | 
						|
 * pid_vnr()   : virtual id, i.e. the id seen from the pid namespace of
 | 
						|
 *               current.
 | 
						|
 * pid_nr_ns() : id seen from the ns specified.
 | 
						|
 *
 | 
						|
 * see also task_xid_nr() etc in include/linux/sched.h
 | 
						|
 */
 | 
						|
 | 
						|
static inline pid_t pid_nr(struct pid *pid)
 | 
						|
{
 | 
						|
	pid_t nr = 0;
 | 
						|
	if (pid)
 | 
						|
		nr = pid->numbers[0].nr;
 | 
						|
	return nr;
 | 
						|
}
 | 
						|
 | 
						|
pid_t pid_nr_ns(struct pid *pid, struct pid_namespace *ns);
 | 
						|
pid_t pid_vnr(struct pid *pid);
 | 
						|
 | 
						|
#define do_each_pid_task(pid, type, task)				\
 | 
						|
	do {								\
 | 
						|
		if ((pid) != NULL)					\
 | 
						|
			hlist_for_each_entry_rcu((task),		\
 | 
						|
				&(pid)->tasks[type], pid_links[type]) {
 | 
						|
 | 
						|
			/*
 | 
						|
			 * Both old and new leaders may be attached to
 | 
						|
			 * the same pid in the middle of de_thread().
 | 
						|
			 */
 | 
						|
#define while_each_pid_task(pid, type, task)				\
 | 
						|
				if (type == PIDTYPE_PID)		\
 | 
						|
					break;				\
 | 
						|
			}						\
 | 
						|
	} while (0)
 | 
						|
 | 
						|
#define do_each_pid_thread(pid, type, task)				\
 | 
						|
	do_each_pid_task(pid, type, task) {				\
 | 
						|
		struct task_struct *tg___ = task;			\
 | 
						|
		for_each_thread(tg___, task) {
 | 
						|
 | 
						|
#define while_each_pid_thread(pid, type, task)				\
 | 
						|
		}							\
 | 
						|
		task = tg___;						\
 | 
						|
	} while_each_pid_task(pid, type, task)
 | 
						|
#endif /* _LINUX_PID_H */
 |