mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 02:30:34 +02:00 
			
		
		
		
	kernel-6.15-rc1.tasklist_lock
-----BEGIN PGP SIGNATURE-----
 
 iHUEABYKAB0WIQRAhzRXHqcMeLMyaSiRxhvAZXjcogUCZ90q3QAKCRCRxhvAZXjc
 oh2TAP9vrH7ft0TpJN2RyFNels/QaYmoiuw4TdVZhbEzvCUyYgEA1Bnx+OGmkdbT
 e4W3NkWJpn8BBjHfz3z3P7SImDdcCAw=
 =a6/i
 -----END PGP SIGNATURE-----
Merge tag 'kernel-6.15-rc1.tasklist_lock' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs
Pull tasklist_lock optimizations from Christian Brauner:
 "According to the performance testbots this brings a 23% performance
  increase when creating new processes:
   - Reduce tasklist_lock hold time on exit:
       - Perform add_device_randomness() without tasklist_lock
       - Perform free_pid() calls outside of tasklist_lock
   - Drop irq disablement around pidmap_lock
   - Add some tasklist_lock asserts
   - Call flush_sigqueue() lockless by changing release_task()
   - Don't pointlessly clear TIF_SIGPENDING in __exit_signal() ->
     clear_tsk_thread_flag()"
* tag 'kernel-6.15-rc1.tasklist_lock' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs:
  pid: drop irq disablement around pidmap_lock
  pid: perform free_pid() calls outside of tasklist_lock
  pid: sprinkle tasklist_lock asserts
  exit: hoist get_pid() in release_task() outside of tasklist_lock
  exit: perform add_device_randomness() without tasklist_lock
  exit: kill the pointless __exit_signal()->clear_tsk_thread_flag(TIF_SIGPENDING)
  exit: change the release_task() paths to call flush_sigqueue() lockless
			
			
This commit is contained in:
		
						commit
						b0cb56cbbd
					
				
					 4 changed files with 93 additions and 66 deletions
				
			
		| 
						 | 
					@ -101,9 +101,9 @@ extern struct pid *get_task_pid(struct task_struct *task, enum pid_type type);
 | 
				
			||||||
 * these helpers must be called with the tasklist_lock write-held.
 | 
					 * these helpers must be called with the tasklist_lock write-held.
 | 
				
			||||||
 */
 | 
					 */
 | 
				
			||||||
extern void attach_pid(struct task_struct *task, enum pid_type);
 | 
					extern void attach_pid(struct task_struct *task, enum pid_type);
 | 
				
			||||||
extern void detach_pid(struct task_struct *task, enum pid_type);
 | 
					void detach_pid(struct pid **pids, struct task_struct *task, enum pid_type);
 | 
				
			||||||
extern void change_pid(struct task_struct *task, enum pid_type,
 | 
					void change_pid(struct pid **pids, struct task_struct *task, enum pid_type,
 | 
				
			||||||
			struct pid *pid);
 | 
							struct pid *pid);
 | 
				
			||||||
extern void exchange_tids(struct task_struct *task, struct task_struct *old);
 | 
					extern void exchange_tids(struct task_struct *task, struct task_struct *old);
 | 
				
			||||||
extern void transfer_pid(struct task_struct *old, struct task_struct *new,
 | 
					extern void transfer_pid(struct task_struct *old, struct task_struct *new,
 | 
				
			||||||
			 enum pid_type);
 | 
								 enum pid_type);
 | 
				
			||||||
| 
						 | 
					@ -129,6 +129,7 @@ extern struct pid *find_ge_pid(int nr, struct pid_namespace *);
 | 
				
			||||||
extern struct pid *alloc_pid(struct pid_namespace *ns, pid_t *set_tid,
 | 
					extern struct pid *alloc_pid(struct pid_namespace *ns, pid_t *set_tid,
 | 
				
			||||||
			     size_t set_tid_size);
 | 
								     size_t set_tid_size);
 | 
				
			||||||
extern void free_pid(struct pid *pid);
 | 
					extern void free_pid(struct pid *pid);
 | 
				
			||||||
 | 
					void free_pids(struct pid **pids);
 | 
				
			||||||
extern void disable_pid_allocation(struct pid_namespace *ns);
 | 
					extern void disable_pid_allocation(struct pid_namespace *ns);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/*
 | 
					/*
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -123,14 +123,22 @@ static __init int kernel_exit_sysfs_init(void)
 | 
				
			||||||
late_initcall(kernel_exit_sysfs_init);
 | 
					late_initcall(kernel_exit_sysfs_init);
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static void __unhash_process(struct task_struct *p, bool group_dead)
 | 
					/*
 | 
				
			||||||
 | 
					 * For things release_task() would like to do *after* tasklist_lock is released.
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					struct release_task_post {
 | 
				
			||||||
 | 
						struct pid *pids[PIDTYPE_MAX];
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static void __unhash_process(struct release_task_post *post, struct task_struct *p,
 | 
				
			||||||
 | 
								     bool group_dead)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	nr_threads--;
 | 
						nr_threads--;
 | 
				
			||||||
	detach_pid(p, PIDTYPE_PID);
 | 
						detach_pid(post->pids, p, PIDTYPE_PID);
 | 
				
			||||||
	if (group_dead) {
 | 
						if (group_dead) {
 | 
				
			||||||
		detach_pid(p, PIDTYPE_TGID);
 | 
							detach_pid(post->pids, p, PIDTYPE_TGID);
 | 
				
			||||||
		detach_pid(p, PIDTYPE_PGID);
 | 
							detach_pid(post->pids, p, PIDTYPE_PGID);
 | 
				
			||||||
		detach_pid(p, PIDTYPE_SID);
 | 
							detach_pid(post->pids, p, PIDTYPE_SID);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		list_del_rcu(&p->tasks);
 | 
							list_del_rcu(&p->tasks);
 | 
				
			||||||
		list_del_init(&p->sibling);
 | 
							list_del_init(&p->sibling);
 | 
				
			||||||
| 
						 | 
					@ -142,7 +150,7 @@ static void __unhash_process(struct task_struct *p, bool group_dead)
 | 
				
			||||||
/*
 | 
					/*
 | 
				
			||||||
 * This function expects the tasklist_lock write-locked.
 | 
					 * This function expects the tasklist_lock write-locked.
 | 
				
			||||||
 */
 | 
					 */
 | 
				
			||||||
static void __exit_signal(struct task_struct *tsk)
 | 
					static void __exit_signal(struct release_task_post *post, struct task_struct *tsk)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct signal_struct *sig = tsk->signal;
 | 
						struct signal_struct *sig = tsk->signal;
 | 
				
			||||||
	bool group_dead = thread_group_leader(tsk);
 | 
						bool group_dead = thread_group_leader(tsk);
 | 
				
			||||||
| 
						 | 
					@ -175,9 +183,6 @@ static void __exit_signal(struct task_struct *tsk)
 | 
				
			||||||
			sig->curr_target = next_thread(tsk);
 | 
								sig->curr_target = next_thread(tsk);
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	add_device_randomness((const void*) &tsk->se.sum_exec_runtime,
 | 
					 | 
				
			||||||
			      sizeof(unsigned long long));
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	/*
 | 
						/*
 | 
				
			||||||
	 * Accumulate here the counters for all threads as they die. We could
 | 
						 * Accumulate here the counters for all threads as they die. We could
 | 
				
			||||||
	 * skip the group leader because it is the last user of signal_struct,
 | 
						 * skip the group leader because it is the last user of signal_struct,
 | 
				
			||||||
| 
						 | 
					@ -198,23 +203,15 @@ static void __exit_signal(struct task_struct *tsk)
 | 
				
			||||||
	task_io_accounting_add(&sig->ioac, &tsk->ioac);
 | 
						task_io_accounting_add(&sig->ioac, &tsk->ioac);
 | 
				
			||||||
	sig->sum_sched_runtime += tsk->se.sum_exec_runtime;
 | 
						sig->sum_sched_runtime += tsk->se.sum_exec_runtime;
 | 
				
			||||||
	sig->nr_threads--;
 | 
						sig->nr_threads--;
 | 
				
			||||||
	__unhash_process(tsk, group_dead);
 | 
						__unhash_process(post, tsk, group_dead);
 | 
				
			||||||
	write_sequnlock(&sig->stats_lock);
 | 
						write_sequnlock(&sig->stats_lock);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/*
 | 
					 | 
				
			||||||
	 * Do this under ->siglock, we can race with another thread
 | 
					 | 
				
			||||||
	 * doing sigqueue_free() if we have SIGQUEUE_PREALLOC signals.
 | 
					 | 
				
			||||||
	 */
 | 
					 | 
				
			||||||
	flush_sigqueue(&tsk->pending);
 | 
					 | 
				
			||||||
	tsk->sighand = NULL;
 | 
						tsk->sighand = NULL;
 | 
				
			||||||
	spin_unlock(&sighand->siglock);
 | 
						spin_unlock(&sighand->siglock);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	__cleanup_sighand(sighand);
 | 
						__cleanup_sighand(sighand);
 | 
				
			||||||
	clear_tsk_thread_flag(tsk, TIF_SIGPENDING);
 | 
						if (group_dead)
 | 
				
			||||||
	if (group_dead) {
 | 
					 | 
				
			||||||
		flush_sigqueue(&sig->shared_pending);
 | 
					 | 
				
			||||||
		tty_kref_put(tty);
 | 
							tty_kref_put(tty);
 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static void delayed_put_task_struct(struct rcu_head *rhp)
 | 
					static void delayed_put_task_struct(struct rcu_head *rhp)
 | 
				
			||||||
| 
						 | 
					@ -240,10 +237,13 @@ void __weak release_thread(struct task_struct *dead_task)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void release_task(struct task_struct *p)
 | 
					void release_task(struct task_struct *p)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
 | 
						struct release_task_post post;
 | 
				
			||||||
	struct task_struct *leader;
 | 
						struct task_struct *leader;
 | 
				
			||||||
	struct pid *thread_pid;
 | 
						struct pid *thread_pid;
 | 
				
			||||||
	int zap_leader;
 | 
						int zap_leader;
 | 
				
			||||||
repeat:
 | 
					repeat:
 | 
				
			||||||
 | 
						memset(&post, 0, sizeof(post));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/* don't need to get the RCU readlock here - the process is dead and
 | 
						/* don't need to get the RCU readlock here - the process is dead and
 | 
				
			||||||
	 * can't be modifying its own credentials. But shut RCU-lockdep up */
 | 
						 * can't be modifying its own credentials. But shut RCU-lockdep up */
 | 
				
			||||||
	rcu_read_lock();
 | 
						rcu_read_lock();
 | 
				
			||||||
| 
						 | 
					@ -253,10 +253,11 @@ void release_task(struct task_struct *p)
 | 
				
			||||||
	pidfs_exit(p);
 | 
						pidfs_exit(p);
 | 
				
			||||||
	cgroup_release(p);
 | 
						cgroup_release(p);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						thread_pid = get_pid(p->thread_pid);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	write_lock_irq(&tasklist_lock);
 | 
						write_lock_irq(&tasklist_lock);
 | 
				
			||||||
	ptrace_release_task(p);
 | 
						ptrace_release_task(p);
 | 
				
			||||||
	thread_pid = get_pid(p->thread_pid);
 | 
						__exit_signal(&post, p);
 | 
				
			||||||
	__exit_signal(p);
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/*
 | 
						/*
 | 
				
			||||||
	 * If we are the last non-leader member of the thread
 | 
						 * If we are the last non-leader member of the thread
 | 
				
			||||||
| 
						 | 
					@ -280,7 +281,20 @@ void release_task(struct task_struct *p)
 | 
				
			||||||
	write_unlock_irq(&tasklist_lock);
 | 
						write_unlock_irq(&tasklist_lock);
 | 
				
			||||||
	proc_flush_pid(thread_pid);
 | 
						proc_flush_pid(thread_pid);
 | 
				
			||||||
	put_pid(thread_pid);
 | 
						put_pid(thread_pid);
 | 
				
			||||||
 | 
						add_device_randomness(&p->se.sum_exec_runtime,
 | 
				
			||||||
 | 
								      sizeof(p->se.sum_exec_runtime));
 | 
				
			||||||
 | 
						free_pids(post.pids);
 | 
				
			||||||
	release_thread(p);
 | 
						release_thread(p);
 | 
				
			||||||
 | 
						/*
 | 
				
			||||||
 | 
						 * This task was already removed from the process/thread/pid lists
 | 
				
			||||||
 | 
						 * and lock_task_sighand(p) can't succeed. Nobody else can touch
 | 
				
			||||||
 | 
						 * ->pending or, if group dead, signal->shared_pending. We can call
 | 
				
			||||||
 | 
						 * flush_sigqueue() lockless.
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
 | 
						flush_sigqueue(&p->pending);
 | 
				
			||||||
 | 
						if (thread_group_leader(p))
 | 
				
			||||||
 | 
							flush_sigqueue(&p->signal->shared_pending);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	put_task_struct_rcu_user(p);
 | 
						put_task_struct_rcu_user(p);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	p = leader;
 | 
						p = leader;
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
							
								
								
									
										82
									
								
								kernel/pid.c
									
									
									
									
									
								
							
							
						
						
									
										82
									
								
								kernel/pid.c
									
									
									
									
									
								
							| 
						 | 
					@ -88,20 +88,6 @@ struct pid_namespace init_pid_ns = {
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
EXPORT_SYMBOL_GPL(init_pid_ns);
 | 
					EXPORT_SYMBOL_GPL(init_pid_ns);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/*
 | 
					 | 
				
			||||||
 * Note: disable interrupts while the pidmap_lock is held as an
 | 
					 | 
				
			||||||
 * interrupt might come in and do read_lock(&tasklist_lock).
 | 
					 | 
				
			||||||
 *
 | 
					 | 
				
			||||||
 * If we don't disable interrupts there is a nasty deadlock between
 | 
					 | 
				
			||||||
 * detach_pid()->free_pid() and another cpu that does
 | 
					 | 
				
			||||||
 * spin_lock(&pidmap_lock) followed by an interrupt routine that does
 | 
					 | 
				
			||||||
 * read_lock(&tasklist_lock);
 | 
					 | 
				
			||||||
 *
 | 
					 | 
				
			||||||
 * After we clean up the tasklist_lock and know there are no
 | 
					 | 
				
			||||||
 * irq handlers that take it we can leave the interrupts enabled.
 | 
					 | 
				
			||||||
 * For now it is easier to be safe than to prove it can't happen.
 | 
					 | 
				
			||||||
 */
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
static  __cacheline_aligned_in_smp DEFINE_SPINLOCK(pidmap_lock);
 | 
					static  __cacheline_aligned_in_smp DEFINE_SPINLOCK(pidmap_lock);
 | 
				
			||||||
seqcount_spinlock_t pidmap_lock_seq = SEQCNT_SPINLOCK_ZERO(pidmap_lock_seq, &pidmap_lock);
 | 
					seqcount_spinlock_t pidmap_lock_seq = SEQCNT_SPINLOCK_ZERO(pidmap_lock_seq, &pidmap_lock);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -128,11 +114,11 @@ static void delayed_put_pid(struct rcu_head *rhp)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void free_pid(struct pid *pid)
 | 
					void free_pid(struct pid *pid)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	/* We can be called with write_lock_irq(&tasklist_lock) held */
 | 
					 | 
				
			||||||
	int i;
 | 
						int i;
 | 
				
			||||||
	unsigned long flags;
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
	spin_lock_irqsave(&pidmap_lock, flags);
 | 
						lockdep_assert_not_held(&tasklist_lock);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						spin_lock(&pidmap_lock);
 | 
				
			||||||
	for (i = 0; i <= pid->level; i++) {
 | 
						for (i = 0; i <= pid->level; i++) {
 | 
				
			||||||
		struct upid *upid = pid->numbers + i;
 | 
							struct upid *upid = pid->numbers + i;
 | 
				
			||||||
		struct pid_namespace *ns = upid->ns;
 | 
							struct pid_namespace *ns = upid->ns;
 | 
				
			||||||
| 
						 | 
					@ -155,11 +141,23 @@ void free_pid(struct pid *pid)
 | 
				
			||||||
		idr_remove(&ns->idr, upid->nr);
 | 
							idr_remove(&ns->idr, upid->nr);
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
	pidfs_remove_pid(pid);
 | 
						pidfs_remove_pid(pid);
 | 
				
			||||||
	spin_unlock_irqrestore(&pidmap_lock, flags);
 | 
						spin_unlock(&pidmap_lock);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	call_rcu(&pid->rcu, delayed_put_pid);
 | 
						call_rcu(&pid->rcu, delayed_put_pid);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void free_pids(struct pid **pids)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						int tmp;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/*
 | 
				
			||||||
 | 
						 * This can batch pidmap_lock.
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
 | 
						for (tmp = PIDTYPE_MAX; --tmp >= 0; )
 | 
				
			||||||
 | 
							if (pids[tmp])
 | 
				
			||||||
 | 
								free_pid(pids[tmp]);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
struct pid *alloc_pid(struct pid_namespace *ns, pid_t *set_tid,
 | 
					struct pid *alloc_pid(struct pid_namespace *ns, pid_t *set_tid,
 | 
				
			||||||
		      size_t set_tid_size)
 | 
							      size_t set_tid_size)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
| 
						 | 
					@ -211,7 +209,7 @@ struct pid *alloc_pid(struct pid_namespace *ns, pid_t *set_tid,
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		idr_preload(GFP_KERNEL);
 | 
							idr_preload(GFP_KERNEL);
 | 
				
			||||||
		spin_lock_irq(&pidmap_lock);
 | 
							spin_lock(&pidmap_lock);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		if (tid) {
 | 
							if (tid) {
 | 
				
			||||||
			nr = idr_alloc(&tmp->idr, NULL, tid,
 | 
								nr = idr_alloc(&tmp->idr, NULL, tid,
 | 
				
			||||||
| 
						 | 
					@ -238,7 +236,7 @@ struct pid *alloc_pid(struct pid_namespace *ns, pid_t *set_tid,
 | 
				
			||||||
			nr = idr_alloc_cyclic(&tmp->idr, NULL, pid_min,
 | 
								nr = idr_alloc_cyclic(&tmp->idr, NULL, pid_min,
 | 
				
			||||||
					      pid_max, GFP_ATOMIC);
 | 
										      pid_max, GFP_ATOMIC);
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
		spin_unlock_irq(&pidmap_lock);
 | 
							spin_unlock(&pidmap_lock);
 | 
				
			||||||
		idr_preload_end();
 | 
							idr_preload_end();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		if (nr < 0) {
 | 
							if (nr < 0) {
 | 
				
			||||||
| 
						 | 
					@ -272,7 +270,7 @@ struct pid *alloc_pid(struct pid_namespace *ns, pid_t *set_tid,
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	upid = pid->numbers + ns->level;
 | 
						upid = pid->numbers + ns->level;
 | 
				
			||||||
	idr_preload(GFP_KERNEL);
 | 
						idr_preload(GFP_KERNEL);
 | 
				
			||||||
	spin_lock_irq(&pidmap_lock);
 | 
						spin_lock(&pidmap_lock);
 | 
				
			||||||
	if (!(ns->pid_allocated & PIDNS_ADDING))
 | 
						if (!(ns->pid_allocated & PIDNS_ADDING))
 | 
				
			||||||
		goto out_unlock;
 | 
							goto out_unlock;
 | 
				
			||||||
	pidfs_add_pid(pid);
 | 
						pidfs_add_pid(pid);
 | 
				
			||||||
| 
						 | 
					@ -281,18 +279,18 @@ struct pid *alloc_pid(struct pid_namespace *ns, pid_t *set_tid,
 | 
				
			||||||
		idr_replace(&upid->ns->idr, pid, upid->nr);
 | 
							idr_replace(&upid->ns->idr, pid, upid->nr);
 | 
				
			||||||
		upid->ns->pid_allocated++;
 | 
							upid->ns->pid_allocated++;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
	spin_unlock_irq(&pidmap_lock);
 | 
						spin_unlock(&pidmap_lock);
 | 
				
			||||||
	idr_preload_end();
 | 
						idr_preload_end();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	return pid;
 | 
						return pid;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
out_unlock:
 | 
					out_unlock:
 | 
				
			||||||
	spin_unlock_irq(&pidmap_lock);
 | 
						spin_unlock(&pidmap_lock);
 | 
				
			||||||
	idr_preload_end();
 | 
						idr_preload_end();
 | 
				
			||||||
	put_pid_ns(ns);
 | 
						put_pid_ns(ns);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
out_free:
 | 
					out_free:
 | 
				
			||||||
	spin_lock_irq(&pidmap_lock);
 | 
						spin_lock(&pidmap_lock);
 | 
				
			||||||
	while (++i <= ns->level) {
 | 
						while (++i <= ns->level) {
 | 
				
			||||||
		upid = pid->numbers + i;
 | 
							upid = pid->numbers + i;
 | 
				
			||||||
		idr_remove(&upid->ns->idr, upid->nr);
 | 
							idr_remove(&upid->ns->idr, upid->nr);
 | 
				
			||||||
| 
						 | 
					@ -302,7 +300,7 @@ struct pid *alloc_pid(struct pid_namespace *ns, pid_t *set_tid,
 | 
				
			||||||
	if (ns->pid_allocated == PIDNS_ADDING)
 | 
						if (ns->pid_allocated == PIDNS_ADDING)
 | 
				
			||||||
		idr_set_cursor(&ns->idr, 0);
 | 
							idr_set_cursor(&ns->idr, 0);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	spin_unlock_irq(&pidmap_lock);
 | 
						spin_unlock(&pidmap_lock);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	kmem_cache_free(ns->pid_cachep, pid);
 | 
						kmem_cache_free(ns->pid_cachep, pid);
 | 
				
			||||||
	return ERR_PTR(retval);
 | 
						return ERR_PTR(retval);
 | 
				
			||||||
| 
						 | 
					@ -310,9 +308,9 @@ struct pid *alloc_pid(struct pid_namespace *ns, pid_t *set_tid,
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void disable_pid_allocation(struct pid_namespace *ns)
 | 
					void disable_pid_allocation(struct pid_namespace *ns)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	spin_lock_irq(&pidmap_lock);
 | 
						spin_lock(&pidmap_lock);
 | 
				
			||||||
	ns->pid_allocated &= ~PIDNS_ADDING;
 | 
						ns->pid_allocated &= ~PIDNS_ADDING;
 | 
				
			||||||
	spin_unlock_irq(&pidmap_lock);
 | 
						spin_unlock(&pidmap_lock);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
struct pid *find_pid_ns(int nr, struct pid_namespace *ns)
 | 
					struct pid *find_pid_ns(int nr, struct pid_namespace *ns)
 | 
				
			||||||
| 
						 | 
					@ -339,17 +337,23 @@ static struct pid **task_pid_ptr(struct task_struct *task, enum pid_type type)
 | 
				
			||||||
 */
 | 
					 */
 | 
				
			||||||
void attach_pid(struct task_struct *task, enum pid_type type)
 | 
					void attach_pid(struct task_struct *task, enum pid_type type)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct pid *pid = *task_pid_ptr(task, type);
 | 
						struct pid *pid;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						lockdep_assert_held_write(&tasklist_lock);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						pid = *task_pid_ptr(task, type);
 | 
				
			||||||
	hlist_add_head_rcu(&task->pid_links[type], &pid->tasks[type]);
 | 
						hlist_add_head_rcu(&task->pid_links[type], &pid->tasks[type]);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static void __change_pid(struct task_struct *task, enum pid_type type,
 | 
					static void __change_pid(struct pid **pids, struct task_struct *task,
 | 
				
			||||||
			struct pid *new)
 | 
								 enum pid_type type, struct pid *new)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct pid **pid_ptr = task_pid_ptr(task, type);
 | 
						struct pid **pid_ptr, *pid;
 | 
				
			||||||
	struct pid *pid;
 | 
					 | 
				
			||||||
	int tmp;
 | 
						int tmp;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						lockdep_assert_held_write(&tasklist_lock);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						pid_ptr = task_pid_ptr(task, type);
 | 
				
			||||||
	pid = *pid_ptr;
 | 
						pid = *pid_ptr;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	hlist_del_rcu(&task->pid_links[type]);
 | 
						hlist_del_rcu(&task->pid_links[type]);
 | 
				
			||||||
| 
						 | 
					@ -364,18 +368,19 @@ static void __change_pid(struct task_struct *task, enum pid_type type,
 | 
				
			||||||
		if (pid_has_task(pid, tmp))
 | 
							if (pid_has_task(pid, tmp))
 | 
				
			||||||
			return;
 | 
								return;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	free_pid(pid);
 | 
						WARN_ON(pids[type]);
 | 
				
			||||||
 | 
						pids[type] = pid;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void detach_pid(struct task_struct *task, enum pid_type type)
 | 
					void detach_pid(struct pid **pids, struct task_struct *task, enum pid_type type)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	__change_pid(task, type, NULL);
 | 
						__change_pid(pids, task, type, NULL);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void change_pid(struct task_struct *task, enum pid_type type,
 | 
					void change_pid(struct pid **pids, struct task_struct *task, enum pid_type type,
 | 
				
			||||||
		struct pid *pid)
 | 
							struct pid *pid)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	__change_pid(task, type, pid);
 | 
						__change_pid(pids, task, type, pid);
 | 
				
			||||||
	attach_pid(task, type);
 | 
						attach_pid(task, type);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -386,6 +391,8 @@ void exchange_tids(struct task_struct *left, struct task_struct *right)
 | 
				
			||||||
	struct hlist_head *head1 = &pid1->tasks[PIDTYPE_PID];
 | 
						struct hlist_head *head1 = &pid1->tasks[PIDTYPE_PID];
 | 
				
			||||||
	struct hlist_head *head2 = &pid2->tasks[PIDTYPE_PID];
 | 
						struct hlist_head *head2 = &pid2->tasks[PIDTYPE_PID];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						lockdep_assert_held_write(&tasklist_lock);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/* Swap the single entry tid lists */
 | 
						/* Swap the single entry tid lists */
 | 
				
			||||||
	hlists_swap_heads_rcu(head1, head2);
 | 
						hlists_swap_heads_rcu(head1, head2);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -403,6 +410,7 @@ void transfer_pid(struct task_struct *old, struct task_struct *new,
 | 
				
			||||||
			   enum pid_type type)
 | 
								   enum pid_type type)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	WARN_ON_ONCE(type == PIDTYPE_PID);
 | 
						WARN_ON_ONCE(type == PIDTYPE_PID);
 | 
				
			||||||
 | 
						lockdep_assert_held_write(&tasklist_lock);
 | 
				
			||||||
	hlist_replace_rcu(&old->pid_links[type], &new->pid_links[type]);
 | 
						hlist_replace_rcu(&old->pid_links[type], &new->pid_links[type]);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
							
								
								
									
										14
									
								
								kernel/sys.c
									
									
									
									
									
								
							
							
						
						
									
										14
									
								
								kernel/sys.c
									
									
									
									
									
								
							| 
						 | 
					@ -1085,6 +1085,7 @@ SYSCALL_DEFINE2(setpgid, pid_t, pid, pid_t, pgid)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct task_struct *p;
 | 
						struct task_struct *p;
 | 
				
			||||||
	struct task_struct *group_leader = current->group_leader;
 | 
						struct task_struct *group_leader = current->group_leader;
 | 
				
			||||||
 | 
						struct pid *pids[PIDTYPE_MAX] = { 0 };
 | 
				
			||||||
	struct pid *pgrp;
 | 
						struct pid *pgrp;
 | 
				
			||||||
	int err;
 | 
						int err;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1142,13 +1143,14 @@ SYSCALL_DEFINE2(setpgid, pid_t, pid, pid_t, pgid)
 | 
				
			||||||
		goto out;
 | 
							goto out;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (task_pgrp(p) != pgrp)
 | 
						if (task_pgrp(p) != pgrp)
 | 
				
			||||||
		change_pid(p, PIDTYPE_PGID, pgrp);
 | 
							change_pid(pids, p, PIDTYPE_PGID, pgrp);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	err = 0;
 | 
						err = 0;
 | 
				
			||||||
out:
 | 
					out:
 | 
				
			||||||
	/* All paths lead to here, thus we are safe. -DaveM */
 | 
						/* All paths lead to here, thus we are safe. -DaveM */
 | 
				
			||||||
	write_unlock_irq(&tasklist_lock);
 | 
						write_unlock_irq(&tasklist_lock);
 | 
				
			||||||
	rcu_read_unlock();
 | 
						rcu_read_unlock();
 | 
				
			||||||
 | 
						free_pids(pids);
 | 
				
			||||||
	return err;
 | 
						return err;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1222,21 +1224,22 @@ SYSCALL_DEFINE1(getsid, pid_t, pid)
 | 
				
			||||||
	return retval;
 | 
						return retval;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static void set_special_pids(struct pid *pid)
 | 
					static void set_special_pids(struct pid **pids, struct pid *pid)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct task_struct *curr = current->group_leader;
 | 
						struct task_struct *curr = current->group_leader;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (task_session(curr) != pid)
 | 
						if (task_session(curr) != pid)
 | 
				
			||||||
		change_pid(curr, PIDTYPE_SID, pid);
 | 
							change_pid(pids, curr, PIDTYPE_SID, pid);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (task_pgrp(curr) != pid)
 | 
						if (task_pgrp(curr) != pid)
 | 
				
			||||||
		change_pid(curr, PIDTYPE_PGID, pid);
 | 
							change_pid(pids, curr, PIDTYPE_PGID, pid);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
int ksys_setsid(void)
 | 
					int ksys_setsid(void)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct task_struct *group_leader = current->group_leader;
 | 
						struct task_struct *group_leader = current->group_leader;
 | 
				
			||||||
	struct pid *sid = task_pid(group_leader);
 | 
						struct pid *sid = task_pid(group_leader);
 | 
				
			||||||
 | 
						struct pid *pids[PIDTYPE_MAX] = { 0 };
 | 
				
			||||||
	pid_t session = pid_vnr(sid);
 | 
						pid_t session = pid_vnr(sid);
 | 
				
			||||||
	int err = -EPERM;
 | 
						int err = -EPERM;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1252,13 +1255,14 @@ int ksys_setsid(void)
 | 
				
			||||||
		goto out;
 | 
							goto out;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	group_leader->signal->leader = 1;
 | 
						group_leader->signal->leader = 1;
 | 
				
			||||||
	set_special_pids(sid);
 | 
						set_special_pids(pids, sid);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	proc_clear_tty(group_leader);
 | 
						proc_clear_tty(group_leader);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	err = session;
 | 
						err = session;
 | 
				
			||||||
out:
 | 
					out:
 | 
				
			||||||
	write_unlock_irq(&tasklist_lock);
 | 
						write_unlock_irq(&tasklist_lock);
 | 
				
			||||||
 | 
						free_pids(pids);
 | 
				
			||||||
	if (err > 0) {
 | 
						if (err > 0) {
 | 
				
			||||||
		proc_sid_connector(group_leader);
 | 
							proc_sid_connector(group_leader);
 | 
				
			||||||
		sched_autogroup_create_attach(group_leader);
 | 
							sched_autogroup_create_attach(group_leader);
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue