forked from mirrors/linux
		
	rcu: Make TASKS_RCU handle tasks that are almost done exiting
Once a task has passed exit_notify() in the do_exit() code path, it is no longer on the task lists, and is therefore no longer visible to rcu_tasks_kthread(). This means that an almost-exited task might be preempted while within a trampoline, and this task won't be waited on by rcu_tasks_kthread(). This commit fixes this bug by adding an srcu_struct. An exiting task does srcu_read_lock() just before calling exit_notify(), and does the corresponding srcu_read_unlock() after doing the final preempt_disable(). This means that rcu_tasks_kthread() can do synchronize_srcu() to wait for all mostly-exited tasks to reach their final preempt_disable() region, and then use synchronize_sched() to wait for those tasks to finish exiting. Reported-by: Oleg Nesterov <oleg@redhat.com> Suggested-by: Lai Jiangshan <laijs@cn.fujitsu.com> Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
This commit is contained in:
		
							parent
							
								
									53c6d4edf8
								
							
						
					
					
						commit
						3f95aa81d2
					
				
					 3 changed files with 27 additions and 0 deletions
				
			
		| 
						 | 
					@ -321,6 +321,8 @@ static inline void rcu_user_hooks_switch(struct task_struct *prev,
 | 
				
			||||||
 * macro rather than an inline function to avoid #include hell.
 | 
					 * macro rather than an inline function to avoid #include hell.
 | 
				
			||||||
 */
 | 
					 */
 | 
				
			||||||
#ifdef CONFIG_TASKS_RCU
 | 
					#ifdef CONFIG_TASKS_RCU
 | 
				
			||||||
 | 
					#define TASKS_RCU(x) x
 | 
				
			||||||
 | 
					extern struct srcu_struct tasks_rcu_exit_srcu;
 | 
				
			||||||
#define rcu_note_voluntary_context_switch(t) \
 | 
					#define rcu_note_voluntary_context_switch(t) \
 | 
				
			||||||
	do { \
 | 
						do { \
 | 
				
			||||||
		preempt_disable(); /* Exclude synchronize_sched(); */ \
 | 
							preempt_disable(); /* Exclude synchronize_sched(); */ \
 | 
				
			||||||
| 
						 | 
					@ -329,6 +331,7 @@ static inline void rcu_user_hooks_switch(struct task_struct *prev,
 | 
				
			||||||
		preempt_enable(); \
 | 
							preempt_enable(); \
 | 
				
			||||||
	} while (0)
 | 
						} while (0)
 | 
				
			||||||
#else /* #ifdef CONFIG_TASKS_RCU */
 | 
					#else /* #ifdef CONFIG_TASKS_RCU */
 | 
				
			||||||
 | 
					#define TASKS_RCU(x) do { } while (0)
 | 
				
			||||||
#define rcu_note_voluntary_context_switch(t)	do { } while (0)
 | 
					#define rcu_note_voluntary_context_switch(t)	do { } while (0)
 | 
				
			||||||
#endif /* #else #ifdef CONFIG_TASKS_RCU */
 | 
					#endif /* #else #ifdef CONFIG_TASKS_RCU */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -667,6 +667,7 @@ void do_exit(long code)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct task_struct *tsk = current;
 | 
						struct task_struct *tsk = current;
 | 
				
			||||||
	int group_dead;
 | 
						int group_dead;
 | 
				
			||||||
 | 
						TASKS_RCU(int tasks_rcu_i);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	profile_task_exit(tsk);
 | 
						profile_task_exit(tsk);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -775,6 +776,7 @@ void do_exit(long code)
 | 
				
			||||||
	 */
 | 
						 */
 | 
				
			||||||
	flush_ptrace_hw_breakpoint(tsk);
 | 
						flush_ptrace_hw_breakpoint(tsk);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						TASKS_RCU(tasks_rcu_i = __srcu_read_lock(&tasks_rcu_exit_srcu));
 | 
				
			||||||
	exit_notify(tsk, group_dead);
 | 
						exit_notify(tsk, group_dead);
 | 
				
			||||||
	proc_exit_connector(tsk);
 | 
						proc_exit_connector(tsk);
 | 
				
			||||||
#ifdef CONFIG_NUMA
 | 
					#ifdef CONFIG_NUMA
 | 
				
			||||||
| 
						 | 
					@ -814,6 +816,7 @@ void do_exit(long code)
 | 
				
			||||||
	if (tsk->nr_dirtied)
 | 
						if (tsk->nr_dirtied)
 | 
				
			||||||
		__this_cpu_add(dirty_throttle_leaks, tsk->nr_dirtied);
 | 
							__this_cpu_add(dirty_throttle_leaks, tsk->nr_dirtied);
 | 
				
			||||||
	exit_rcu();
 | 
						exit_rcu();
 | 
				
			||||||
 | 
						TASKS_RCU(__srcu_read_unlock(&tasks_rcu_exit_srcu, tasks_rcu_i));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/*
 | 
						/*
 | 
				
			||||||
	 * The setting of TASK_RUNNING by try_to_wake_up() may be delayed
 | 
						 * The setting of TASK_RUNNING by try_to_wake_up() may be delayed
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -367,6 +367,13 @@ static struct rcu_head *rcu_tasks_cbs_head;
 | 
				
			||||||
static struct rcu_head **rcu_tasks_cbs_tail = &rcu_tasks_cbs_head;
 | 
					static struct rcu_head **rcu_tasks_cbs_tail = &rcu_tasks_cbs_head;
 | 
				
			||||||
static DEFINE_RAW_SPINLOCK(rcu_tasks_cbs_lock);
 | 
					static DEFINE_RAW_SPINLOCK(rcu_tasks_cbs_lock);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/* Track exiting tasks in order to allow them to be waited for. */
 | 
				
			||||||
 | 
					DEFINE_SRCU(tasks_rcu_exit_srcu);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/* Control stall timeouts.  Disable with <= 0, otherwise jiffies till stall. */
 | 
				
			||||||
 | 
					static int rcu_task_stall_timeout __read_mostly = HZ * 60 * 3;
 | 
				
			||||||
 | 
					module_param(rcu_task_stall_timeout, int, 0644);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/* Post an RCU-tasks callback. */
 | 
					/* Post an RCU-tasks callback. */
 | 
				
			||||||
void call_rcu_tasks(struct rcu_head *rhp, void (*func)(struct rcu_head *rhp))
 | 
					void call_rcu_tasks(struct rcu_head *rhp, void (*func)(struct rcu_head *rhp))
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
| 
						 | 
					@ -517,6 +524,15 @@ static int __noreturn rcu_tasks_kthread(void *arg)
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
		rcu_read_unlock();
 | 
							rcu_read_unlock();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							/*
 | 
				
			||||||
 | 
							 * Wait for tasks that are in the process of exiting.
 | 
				
			||||||
 | 
							 * This does only part of the job, ensuring that all
 | 
				
			||||||
 | 
							 * tasks that were previously exiting reach the point
 | 
				
			||||||
 | 
							 * where they have disabled preemption, allowing the
 | 
				
			||||||
 | 
							 * later synchronize_sched() to finish the job.
 | 
				
			||||||
 | 
							 */
 | 
				
			||||||
 | 
							synchronize_srcu(&tasks_rcu_exit_srcu);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		/*
 | 
							/*
 | 
				
			||||||
		 * Each pass through the following loop scans the list
 | 
							 * Each pass through the following loop scans the list
 | 
				
			||||||
		 * of holdout tasks, removing any that are no longer
 | 
							 * of holdout tasks, removing any that are no longer
 | 
				
			||||||
| 
						 | 
					@ -546,6 +562,11 @@ static int __noreturn rcu_tasks_kthread(void *arg)
 | 
				
			||||||
		 * ->rcu_tasks_holdout accesses to be within the grace
 | 
							 * ->rcu_tasks_holdout accesses to be within the grace
 | 
				
			||||||
		 * period, avoiding the need for memory barriers for
 | 
							 * period, avoiding the need for memory barriers for
 | 
				
			||||||
		 * ->rcu_tasks_holdout accesses.
 | 
							 * ->rcu_tasks_holdout accesses.
 | 
				
			||||||
 | 
							 *
 | 
				
			||||||
 | 
							 * In addition, this synchronize_sched() waits for exiting
 | 
				
			||||||
 | 
							 * tasks to complete their final preempt_disable() region
 | 
				
			||||||
 | 
							 * of execution, cleaning up after the synchronize_srcu()
 | 
				
			||||||
 | 
							 * above.
 | 
				
			||||||
		 */
 | 
							 */
 | 
				
			||||||
		synchronize_sched();
 | 
							synchronize_sched();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue