forked from mirrors/linux
		
	softlockup: make detector be aware of task switch of processes hogging cpu
For now, soft lockup detector warns once for each case of process softlockup. But the thread 'watchdog/n' may not always get the cpu at the time slot between the task switch of two processes hogging that cpu to reset soft_watchdog_warn. An example would be two processes hogging the cpu. Process A causes the softlockup warning and is killed manually by a user. Process B immediately becomes the new process hogging the cpu preventing the softlockup code from resetting the soft_watchdog_warn variable. This case is a false negative of "warn only once for a process", as there may be a different process that is going to hog the cpu. Resolve this by saving/checking the task pointer of the hogging process and use that to reset soft_watchdog_warn too. [dzickus@redhat.com: update comment] Signed-off-by: chai wen <chaiw.fnst@cn.fujitsu.com> Signed-off-by: Don Zickus <dzickus@redhat.com> Cc: Ingo Molnar <mingo@elte.hu> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
		
							parent
							
								
									f775da2fc2
								
							
						
					
					
						commit
						b1a8de1f53
					
				
					 1 changed files with 17 additions and 1 deletions
				
			
		| 
						 | 
					@ -47,6 +47,7 @@ static DEFINE_PER_CPU(bool, softlockup_touch_sync);
 | 
				
			||||||
static DEFINE_PER_CPU(bool, soft_watchdog_warn);
 | 
					static DEFINE_PER_CPU(bool, soft_watchdog_warn);
 | 
				
			||||||
static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts);
 | 
					static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts);
 | 
				
			||||||
static DEFINE_PER_CPU(unsigned long, soft_lockup_hrtimer_cnt);
 | 
					static DEFINE_PER_CPU(unsigned long, soft_lockup_hrtimer_cnt);
 | 
				
			||||||
 | 
					static DEFINE_PER_CPU(struct task_struct *, softlockup_task_ptr_saved);
 | 
				
			||||||
#ifdef CONFIG_HARDLOCKUP_DETECTOR
 | 
					#ifdef CONFIG_HARDLOCKUP_DETECTOR
 | 
				
			||||||
static DEFINE_PER_CPU(bool, hard_watchdog_warn);
 | 
					static DEFINE_PER_CPU(bool, hard_watchdog_warn);
 | 
				
			||||||
static DEFINE_PER_CPU(bool, watchdog_nmi_touch);
 | 
					static DEFINE_PER_CPU(bool, watchdog_nmi_touch);
 | 
				
			||||||
| 
						 | 
					@ -333,8 +334,22 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
 | 
				
			||||||
			return HRTIMER_RESTART;
 | 
								return HRTIMER_RESTART;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		/* only warn once */
 | 
							/* only warn once */
 | 
				
			||||||
		if (__this_cpu_read(soft_watchdog_warn) == true)
 | 
							if (__this_cpu_read(soft_watchdog_warn) == true) {
 | 
				
			||||||
 | 
								/*
 | 
				
			||||||
 | 
								 * When multiple processes are causing softlockups the
 | 
				
			||||||
 | 
								 * softlockup detector only warns on the first one
 | 
				
			||||||
 | 
								 * because the code relies on a full quiet cycle to
 | 
				
			||||||
 | 
								 * re-arm.  The second process prevents the quiet cycle
 | 
				
			||||||
 | 
								 * and never gets reported.  Use task pointers to detect
 | 
				
			||||||
 | 
								 * this.
 | 
				
			||||||
 | 
								 */
 | 
				
			||||||
 | 
								if (__this_cpu_read(softlockup_task_ptr_saved) !=
 | 
				
			||||||
 | 
								    current) {
 | 
				
			||||||
 | 
									__this_cpu_write(soft_watchdog_warn, false);
 | 
				
			||||||
 | 
									__touch_watchdog();
 | 
				
			||||||
 | 
								}
 | 
				
			||||||
			return HRTIMER_RESTART;
 | 
								return HRTIMER_RESTART;
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		if (softlockup_all_cpu_backtrace) {
 | 
							if (softlockup_all_cpu_backtrace) {
 | 
				
			||||||
			/* Prevent multiple soft-lockup reports if one cpu is already
 | 
								/* Prevent multiple soft-lockup reports if one cpu is already
 | 
				
			||||||
| 
						 | 
					@ -350,6 +365,7 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
 | 
				
			||||||
		pr_emerg("BUG: soft lockup - CPU#%d stuck for %us! [%s:%d]\n",
 | 
							pr_emerg("BUG: soft lockup - CPU#%d stuck for %us! [%s:%d]\n",
 | 
				
			||||||
			smp_processor_id(), duration,
 | 
								smp_processor_id(), duration,
 | 
				
			||||||
			current->comm, task_pid_nr(current));
 | 
								current->comm, task_pid_nr(current));
 | 
				
			||||||
 | 
							__this_cpu_write(softlockup_task_ptr_saved, current);
 | 
				
			||||||
		print_modules();
 | 
							print_modules();
 | 
				
			||||||
		print_irqtrace_events(current);
 | 
							print_irqtrace_events(current);
 | 
				
			||||||
		if (regs)
 | 
							if (regs)
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue