mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 02:30:34 +02:00 
			
		
		
		
	watchdog/softlockup: Fix cpu_stop_queue_work() double-queue bug
When scheduling is delayed for longer than the softlockup interrupt
period it is possible to double-queue the cpu_stop_work, causing list
corruption.
Cure this by adding a completion to track the cpu_stop_work's
progress.
Reported-by: kernel test robot <lkp@intel.com>
Tested-by: Rong Chen <rong.a.chen@intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Fixes: 9cf57731b6 ("watchdog/softlockup: Replace "watchdog/%u" threads with cpu_stop_work")
Link: http://lkml.kernel.org/r/20180713104208.GW2494@hirez.programming.kicks-ass.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
			
			
This commit is contained in:
		
							parent
							
								
									fdf2ceb7f5
								
							
						
					
					
						commit
						be45bf5395
					
				
					 1 changed files with 15 additions and 5 deletions
				
			
		| 
						 | 
					@ -330,6 +330,9 @@ static void watchdog_interrupt_count(void)
 | 
				
			||||||
	__this_cpu_inc(hrtimer_interrupts);
 | 
						__this_cpu_inc(hrtimer_interrupts);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static DEFINE_PER_CPU(struct completion, softlockup_completion);
 | 
				
			||||||
 | 
					static DEFINE_PER_CPU(struct cpu_stop_work, softlockup_stop_work);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/*
 | 
					/*
 | 
				
			||||||
 * The watchdog thread function - touches the timestamp.
 | 
					 * The watchdog thread function - touches the timestamp.
 | 
				
			||||||
 *
 | 
					 *
 | 
				
			||||||
| 
						 | 
					@ -343,12 +346,11 @@ static int softlockup_fn(void *data)
 | 
				
			||||||
	__this_cpu_write(soft_lockup_hrtimer_cnt,
 | 
						__this_cpu_write(soft_lockup_hrtimer_cnt,
 | 
				
			||||||
			 __this_cpu_read(hrtimer_interrupts));
 | 
								 __this_cpu_read(hrtimer_interrupts));
 | 
				
			||||||
	__touch_watchdog();
 | 
						__touch_watchdog();
 | 
				
			||||||
 | 
						complete(this_cpu_ptr(&softlockup_completion));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	return 0;
 | 
						return 0;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static DEFINE_PER_CPU(struct cpu_stop_work, softlockup_stop_work);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
/* watchdog kicker functions */
 | 
					/* watchdog kicker functions */
 | 
				
			||||||
static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
 | 
					static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
| 
						 | 
					@ -364,9 +366,12 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
 | 
				
			||||||
	watchdog_interrupt_count();
 | 
						watchdog_interrupt_count();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/* kick the softlockup detector */
 | 
						/* kick the softlockup detector */
 | 
				
			||||||
 | 
						if (completion_done(this_cpu_ptr(&softlockup_completion))) {
 | 
				
			||||||
 | 
							reinit_completion(this_cpu_ptr(&softlockup_completion));
 | 
				
			||||||
		stop_one_cpu_nowait(smp_processor_id(),
 | 
							stop_one_cpu_nowait(smp_processor_id(),
 | 
				
			||||||
				softlockup_fn, NULL,
 | 
									softlockup_fn, NULL,
 | 
				
			||||||
				this_cpu_ptr(&softlockup_stop_work));
 | 
									this_cpu_ptr(&softlockup_stop_work));
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/* .. and repeat */
 | 
						/* .. and repeat */
 | 
				
			||||||
	hrtimer_forward_now(hrtimer, ns_to_ktime(sample_period));
 | 
						hrtimer_forward_now(hrtimer, ns_to_ktime(sample_period));
 | 
				
			||||||
| 
						 | 
					@ -467,9 +472,13 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
 | 
				
			||||||
static void watchdog_enable(unsigned int cpu)
 | 
					static void watchdog_enable(unsigned int cpu)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct hrtimer *hrtimer = this_cpu_ptr(&watchdog_hrtimer);
 | 
						struct hrtimer *hrtimer = this_cpu_ptr(&watchdog_hrtimer);
 | 
				
			||||||
 | 
						struct completion *done = this_cpu_ptr(&softlockup_completion);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	WARN_ON_ONCE(cpu != smp_processor_id());
 | 
						WARN_ON_ONCE(cpu != smp_processor_id());
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						init_completion(done);
 | 
				
			||||||
 | 
						complete(done);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/*
 | 
						/*
 | 
				
			||||||
	 * Start the timer first to prevent the NMI watchdog triggering
 | 
						 * Start the timer first to prevent the NMI watchdog triggering
 | 
				
			||||||
	 * before the timer has a chance to fire.
 | 
						 * before the timer has a chance to fire.
 | 
				
			||||||
| 
						 | 
					@ -499,6 +508,7 @@ static void watchdog_disable(unsigned int cpu)
 | 
				
			||||||
	 */
 | 
						 */
 | 
				
			||||||
	watchdog_nmi_disable(cpu);
 | 
						watchdog_nmi_disable(cpu);
 | 
				
			||||||
	hrtimer_cancel(hrtimer);
 | 
						hrtimer_cancel(hrtimer);
 | 
				
			||||||
 | 
						wait_for_completion(this_cpu_ptr(&softlockup_completion));
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static int softlockup_stop_fn(void *data)
 | 
					static int softlockup_stop_fn(void *data)
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue