forked from mirrors/linux
		
	cgroup: Use css_tryget() instead of css_tryget_online() in task_get_css()
A PF_EXITING task can stay associated with an offline css.  If such
task calls task_get_css(), it can get stuck indefinitely.  This can be
triggered by BSD process accounting which writes to a file with
PF_EXITING set when racing against memcg disable as in the backtrace
at the end.
After this change, task_get_css() may return a css which was already
offline when the function was called.  None of the existing users are
affected by this change.
  INFO: rcu_sched self-detected stall on CPU
  INFO: rcu_sched detected stalls on CPUs/tasks:
  ...
  NMI backtrace for cpu 0
  ...
  Call Trace:
   <IRQ>
   dump_stack+0x46/0x68
   nmi_cpu_backtrace.cold.2+0x13/0x57
   nmi_trigger_cpumask_backtrace+0xba/0xca
   rcu_dump_cpu_stacks+0x9e/0xce
   rcu_check_callbacks.cold.74+0x2af/0x433
   update_process_times+0x28/0x60
   tick_sched_timer+0x34/0x70
   __hrtimer_run_queues+0xee/0x250
   hrtimer_interrupt+0xf4/0x210
   smp_apic_timer_interrupt+0x56/0x110
   apic_timer_interrupt+0xf/0x20
   </IRQ>
  RIP: 0010:balance_dirty_pages_ratelimited+0x28f/0x3d0
  ...
   btrfs_file_write_iter+0x31b/0x563
   __vfs_write+0xfa/0x140
   __kernel_write+0x4f/0x100
   do_acct_process+0x495/0x580
   acct_process+0xb9/0xdb
   do_exit+0x748/0xa00
   do_group_exit+0x3a/0xa0
   get_signal+0x254/0x560
   do_signal+0x23/0x5c0
   exit_to_usermode_loop+0x5d/0xa0
   prepare_exit_to_usermode+0x53/0x80
   retint_user+0x8/0x8
Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: stable@vger.kernel.org # v4.2+
Fixes: ec438699a9 ("cgroup, block: implement task_get_css() and use it in bio_associate_current()")
			
			
This commit is contained in:
		
							parent
							
								
									9fb67d643f
								
							
						
					
					
						commit
						18fa84a2db
					
				
					 1 changed files with 8 additions and 2 deletions
				
			
		|  | @ -487,7 +487,7 @@ static inline struct cgroup_subsys_state *task_css(struct task_struct *task, | ||||||
|  * |  * | ||||||
|  * Find the css for the (@task, @subsys_id) combination, increment a |  * Find the css for the (@task, @subsys_id) combination, increment a | ||||||
|  * reference on and return it.  This function is guaranteed to return a |  * reference on and return it.  This function is guaranteed to return a | ||||||
|  * valid css. |  * valid css.  The returned css may already have been offlined. | ||||||
|  */ |  */ | ||||||
| static inline struct cgroup_subsys_state * | static inline struct cgroup_subsys_state * | ||||||
| task_get_css(struct task_struct *task, int subsys_id) | task_get_css(struct task_struct *task, int subsys_id) | ||||||
|  | @ -497,7 +497,13 @@ task_get_css(struct task_struct *task, int subsys_id) | ||||||
| 	rcu_read_lock(); | 	rcu_read_lock(); | ||||||
| 	while (true) { | 	while (true) { | ||||||
| 		css = task_css(task, subsys_id); | 		css = task_css(task, subsys_id); | ||||||
| 		if (likely(css_tryget_online(css))) | 		/*
 | ||||||
|  | 		 * Can't use css_tryget_online() here.  A task which has | ||||||
|  | 		 * PF_EXITING set may stay associated with an offline css. | ||||||
|  | 		 * If such task calls this function, css_tryget_online() | ||||||
|  | 		 * will keep failing. | ||||||
|  | 		 */ | ||||||
|  | 		if (likely(css_tryget(css))) | ||||||
| 			break; | 			break; | ||||||
| 		cpu_relax(); | 		cpu_relax(); | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
		Loading…
	
		Reference in a new issue
	
	 Tejun Heo
						Tejun Heo