forked from mirrors/linux
		
	sched/api: Introduce task_rcu_dereference() and try_get_task_struct()
Generally task_struct is only protected by RCU if it was found on a RCU protected list (say, for_each_process() or find_task_by_vpid()). As Kirill pointed out rq->curr isn't protected by RCU, the scheduler drops the (potentially) last reference without RCU gp, this means that we need to fix the code which uses foreign_rq->curr under rcu_read_lock(). Add a new helper which can be used to dereference rq->curr or any other pointer to task_struct assuming that it should be cleared or updated before the final put_task_struct(). It returns non-NULL only if this task can't go away before rcu_read_unlock(). ( Also add try_get_task_struct() to make it easier to use this API correctly. ) Suggested-by: Kirill Tkhai <ktkhai@parallels.com> Signed-off-by: Oleg Nesterov <oleg@redhat.com> [ Updated comments; added try_get_task_struct()] Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Chris Metcalf <cmetcalf@ezchip.com> Cc: Christoph Lameter <cl@linux.com> Cc: Kirill Tkhai <tkhai@yandex.ru> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Mike Galbraith <efault@gmx.de> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Vladimir Davydov <vdavydov@parallels.com> Link: http://lkml.kernel.org/r/20160518170218.GY3192@twins.programming.kicks-ass.net Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
		
							parent
							
								
									df55f462b9
								
							
						
					
					
						commit
						150593bf86
					
				
					 2 changed files with 79 additions and 0 deletions
				
			
		|  | @ -2139,6 +2139,9 @@ static inline void put_task_struct(struct task_struct *t) | |||
| 		__put_task_struct(t); | ||||
| } | ||||
| 
 | ||||
| struct task_struct *task_rcu_dereference(struct task_struct **ptask); | ||||
| struct task_struct *try_get_task_struct(struct task_struct **ptask); | ||||
| 
 | ||||
| #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN | ||||
| extern void task_cputime(struct task_struct *t, | ||||
| 			 cputime_t *utime, cputime_t *stime); | ||||
|  |  | |||
|  | @ -210,6 +210,82 @@ void release_task(struct task_struct *p) | |||
| 		goto repeat; | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Note that if this function returns a valid task_struct pointer (!NULL) | ||||
|  * task->usage must remain >0 for the duration of the RCU critical section. | ||||
|  */ | ||||
| struct task_struct *task_rcu_dereference(struct task_struct **ptask) | ||||
| { | ||||
| 	struct sighand_struct *sighand; | ||||
| 	struct task_struct *task; | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * We need to verify that release_task() was not called and thus | ||||
| 	 * delayed_put_task_struct() can't run and drop the last reference | ||||
| 	 * before rcu_read_unlock(). We check task->sighand != NULL, | ||||
| 	 * but we can read the already freed and reused memory. | ||||
| 	 */ | ||||
| retry: | ||||
| 	task = rcu_dereference(*ptask); | ||||
| 	if (!task) | ||||
| 		return NULL; | ||||
| 
 | ||||
| 	probe_kernel_address(&task->sighand, sighand); | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Pairs with atomic_dec_and_test() in put_task_struct(). If this task | ||||
| 	 * was already freed we can not miss the preceding update of this | ||||
| 	 * pointer. | ||||
| 	 */ | ||||
| 	smp_rmb(); | ||||
| 	if (unlikely(task != READ_ONCE(*ptask))) | ||||
| 		goto retry; | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * We've re-checked that "task == *ptask", now we have two different | ||||
| 	 * cases: | ||||
| 	 * | ||||
| 	 * 1. This is actually the same task/task_struct. In this case | ||||
| 	 *    sighand != NULL tells us it is still alive. | ||||
| 	 * | ||||
| 	 * 2. This is another task which got the same memory for task_struct. | ||||
| 	 *    We can't know this of course, and we can not trust | ||||
| 	 *    sighand != NULL. | ||||
| 	 * | ||||
| 	 *    In this case we actually return a random value, but this is | ||||
| 	 *    correct. | ||||
| 	 * | ||||
| 	 *    If we return NULL - we can pretend that we actually noticed that | ||||
| 	 *    *ptask was updated when the previous task has exited. Or pretend | ||||
| 	 *    that probe_slab_address(&sighand) reads NULL. | ||||
| 	 * | ||||
| 	 *    If we return the new task (because sighand is not NULL for any | ||||
| 	 *    reason) - this is fine too. This (new) task can't go away before | ||||
| 	 *    another gp pass. | ||||
| 	 * | ||||
| 	 *    And note: We could even eliminate the false positive if re-read | ||||
| 	 *    task->sighand once again to avoid the falsely NULL. But this case | ||||
| 	 *    is very unlikely so we don't care. | ||||
| 	 */ | ||||
| 	if (!sighand) | ||||
| 		return NULL; | ||||
| 
 | ||||
| 	return task; | ||||
| } | ||||
| 
 | ||||
| struct task_struct *try_get_task_struct(struct task_struct **ptask) | ||||
| { | ||||
| 	struct task_struct *task; | ||||
| 
 | ||||
| 	rcu_read_lock(); | ||||
| 	task = task_rcu_dereference(ptask); | ||||
| 	if (task) | ||||
| 		get_task_struct(task); | ||||
| 	rcu_read_unlock(); | ||||
| 
 | ||||
| 	return task; | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Determine if a process group is "orphaned", according to the POSIX | ||||
|  * definition in 2.2.2.52.  Orphaned process groups are not to be affected | ||||
|  |  | |||
		Loading…
	
		Reference in a new issue
	
	 Oleg Nesterov
						Oleg Nesterov