mirror of
				https://github.com/torvalds/linux.git
				synced 2025-10-31 16:48:26 +02:00 
			
		
		
		
	rcu: Make TASKS_RCU handle tasks that are almost done exiting
Once a task has passed exit_notify() in the do_exit() code path, it is no longer on the task lists, and is therefore no longer visible to rcu_tasks_kthread(). This means that an almost-exited task might be preempted while within a trampoline, and this task won't be waited on by rcu_tasks_kthread(). This commit fixes this bug by adding an srcu_struct. An exiting task does srcu_read_lock() just before calling exit_notify(), and does the corresponding srcu_read_unlock() after doing the final preempt_disable(). This means that rcu_tasks_kthread() can do synchronize_srcu() to wait for all mostly-exited tasks to reach their final preempt_disable() region, and then use synchronize_sched() to wait for those tasks to finish exiting. Reported-by: Oleg Nesterov <oleg@redhat.com> Suggested-by: Lai Jiangshan <laijs@cn.fujitsu.com> Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
This commit is contained in:
		
							parent
							
								
									53c6d4edf8
								
							
						
					
					
						commit
						3f95aa81d2
					
				
					 3 changed files with 27 additions and 0 deletions
				
			
		|  | @ -321,6 +321,8 @@ static inline void rcu_user_hooks_switch(struct task_struct *prev, | |||
|  * macro rather than an inline function to avoid #include hell. | ||||
|  */ | ||||
| #ifdef CONFIG_TASKS_RCU | ||||
| #define TASKS_RCU(x) x | ||||
| extern struct srcu_struct tasks_rcu_exit_srcu; | ||||
| #define rcu_note_voluntary_context_switch(t) \ | ||||
| 	do { \ | ||||
| 		preempt_disable(); /* Exclude synchronize_sched(); */ \ | ||||
|  | @ -329,6 +331,7 @@ static inline void rcu_user_hooks_switch(struct task_struct *prev, | |||
| 		preempt_enable(); \ | ||||
| 	} while (0) | ||||
| #else /* #ifdef CONFIG_TASKS_RCU */ | ||||
| #define TASKS_RCU(x) do { } while (0) | ||||
| #define rcu_note_voluntary_context_switch(t)	do { } while (0) | ||||
| #endif /* #else #ifdef CONFIG_TASKS_RCU */ | ||||
| 
 | ||||
|  |  | |||
|  | @ -667,6 +667,7 @@ void do_exit(long code) | |||
| { | ||||
| 	struct task_struct *tsk = current; | ||||
| 	int group_dead; | ||||
| 	TASKS_RCU(int tasks_rcu_i); | ||||
| 
 | ||||
| 	profile_task_exit(tsk); | ||||
| 
 | ||||
|  | @ -775,6 +776,7 @@ void do_exit(long code) | |||
| 	 */ | ||||
| 	flush_ptrace_hw_breakpoint(tsk); | ||||
| 
 | ||||
| 	TASKS_RCU(tasks_rcu_i = __srcu_read_lock(&tasks_rcu_exit_srcu)); | ||||
| 	exit_notify(tsk, group_dead); | ||||
| 	proc_exit_connector(tsk); | ||||
| #ifdef CONFIG_NUMA | ||||
|  | @ -814,6 +816,7 @@ void do_exit(long code) | |||
| 	if (tsk->nr_dirtied) | ||||
| 		__this_cpu_add(dirty_throttle_leaks, tsk->nr_dirtied); | ||||
| 	exit_rcu(); | ||||
| 	TASKS_RCU(__srcu_read_unlock(&tasks_rcu_exit_srcu, tasks_rcu_i)); | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * The setting of TASK_RUNNING by try_to_wake_up() may be delayed | ||||
|  |  | |||
|  | @ -367,6 +367,13 @@ static struct rcu_head *rcu_tasks_cbs_head; | |||
| static struct rcu_head **rcu_tasks_cbs_tail = &rcu_tasks_cbs_head; | ||||
| static DEFINE_RAW_SPINLOCK(rcu_tasks_cbs_lock); | ||||
| 
 | ||||
| /* Track exiting tasks in order to allow them to be waited for. */ | ||||
| DEFINE_SRCU(tasks_rcu_exit_srcu); | ||||
| 
 | ||||
| /* Control stall timeouts.  Disable with <= 0, otherwise jiffies till stall. */ | ||||
| static int rcu_task_stall_timeout __read_mostly = HZ * 60 * 3; | ||||
| module_param(rcu_task_stall_timeout, int, 0644); | ||||
| 
 | ||||
| /* Post an RCU-tasks callback. */ | ||||
| void call_rcu_tasks(struct rcu_head *rhp, void (*func)(struct rcu_head *rhp)) | ||||
| { | ||||
|  | @ -517,6 +524,15 @@ static int __noreturn rcu_tasks_kthread(void *arg) | |||
| 		} | ||||
| 		rcu_read_unlock(); | ||||
| 
 | ||||
| 		/*
 | ||||
| 		 * Wait for tasks that are in the process of exiting. | ||||
| 		 * This does only part of the job, ensuring that all | ||||
| 		 * tasks that were previously exiting reach the point | ||||
| 		 * where they have disabled preemption, allowing the | ||||
| 		 * later synchronize_sched() to finish the job. | ||||
| 		 */ | ||||
| 		synchronize_srcu(&tasks_rcu_exit_srcu); | ||||
| 
 | ||||
| 		/*
 | ||||
| 		 * Each pass through the following loop scans the list | ||||
| 		 * of holdout tasks, removing any that are no longer | ||||
|  | @ -546,6 +562,11 @@ static int __noreturn rcu_tasks_kthread(void *arg) | |||
| 		 * ->rcu_tasks_holdout accesses to be within the grace | ||||
| 		 * period, avoiding the need for memory barriers for | ||||
| 		 * ->rcu_tasks_holdout accesses. | ||||
| 		 * | ||||
| 		 * In addition, this synchronize_sched() waits for exiting | ||||
| 		 * tasks to complete their final preempt_disable() region | ||||
| 		 * of execution, cleaning up after the synchronize_srcu() | ||||
| 		 * above. | ||||
| 		 */ | ||||
| 		synchronize_sched(); | ||||
| 
 | ||||
|  |  | |||
		Loading…
	
		Reference in a new issue
	
	 Paul E. McKenney
						Paul E. McKenney