mirror of
				https://github.com/torvalds/linux.git
				synced 2025-10-31 08:38:45 +02:00 
			
		
		
		
	 616db8779b
			
		
	
	
		616db8779b
		
	
	
	
	
		
			
			If a per-cpu work item hogs the CPU, it can prevent other work items from
starting through concurrency management. A per-cpu workqueue which intends
to host such CPU-hogging work items can choose to not participate in
concurrency management by setting %WQ_CPU_INTENSIVE; however, this can be
error-prone and difficult to debug when missed.
This patch adds an automatic CPU usage based detection. If a
concurrency-managed work item consumes more CPU time than the threshold
(10ms by default) continuously without intervening sleeps, wq_worker_tick()
which is called from scheduler_tick() will detect the condition and
automatically mark it CPU_INTENSIVE.
The mechanism isn't foolproof:
* Detection depends on tick hitting the work item. Getting preempted at the
  right timings may allow a violating work item to evade detection at least
  temporarily.
* nohz_full CPUs may not be running ticks and thus can fail detection.
* Even when detection is working, the 10ms detection delays can add up if
  many CPU-hogging work items are queued at the same time.
However, in vast majority of cases, this should be able to detect violations
reliably and provide reasonable protection with a small increase in code
complexity.
If some work items trigger this condition repeatedly, the bigger problem
likely is the CPU being saturated with such per-cpu work items and the
solution would be making them UNBOUND. The next patch will add a debug
mechanism to help spot such cases.
v4: Documentation for workqueue.cpu_intensive_thresh_us added to
    kernel-parameters.txt.
v3: Switch to use wq_worker_tick() instead of hooking into preemptions as
    suggested by Peter.
v2: Lai pointed out that wq_worker_stopping() also needs to be called from
    preemption and rtlock paths and an earlier patch was updated
    accordingly. This patch adds a comment describing the risk of infinte
    recursions and how they're avoided.
Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Lai Jiangshan <jiangshanlai@gmail.com>
		
	
			
		
			
				
	
	
		
			83 lines
		
	
	
	
		
			2.5 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			83 lines
		
	
	
	
		
			2.5 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| /* SPDX-License-Identifier: GPL-2.0 */
 | |
| /*
 | |
|  * kernel/workqueue_internal.h
 | |
|  *
 | |
|  * Workqueue internal header file.  Only to be included by workqueue and
 | |
|  * core kernel subsystems.
 | |
|  */
 | |
| #ifndef _KERNEL_WORKQUEUE_INTERNAL_H
 | |
| #define _KERNEL_WORKQUEUE_INTERNAL_H
 | |
| 
 | |
| #include <linux/workqueue.h>
 | |
| #include <linux/kthread.h>
 | |
| #include <linux/preempt.h>
 | |
| 
 | |
| struct worker_pool;
 | |
| 
 | |
| /*
 | |
|  * The poor guys doing the actual heavy lifting.  All on-duty workers are
 | |
|  * either serving the manager role, on idle list or on busy hash.  For
 | |
|  * details on the locking annotation (L, I, X...), refer to workqueue.c.
 | |
|  *
 | |
|  * Only to be used in workqueue and async.
 | |
|  */
 | |
| struct worker {
 | |
| 	/* on idle list while idle, on busy hash table while busy */
 | |
| 	union {
 | |
| 		struct list_head	entry;	/* L: while idle */
 | |
| 		struct hlist_node	hentry;	/* L: while busy */
 | |
| 	};
 | |
| 
 | |
| 	struct work_struct	*current_work;	/* K: work being processed and its */
 | |
| 	work_func_t		current_func;	/* K: function */
 | |
| 	struct pool_workqueue	*current_pwq;	/* K: pwq */
 | |
| 	u64			current_at;	/* K: runtime at start or last wakeup */
 | |
| 	unsigned int		current_color;	/* K: color */
 | |
| 
 | |
| 	int			sleeping;	/* S: is worker sleeping? */
 | |
| 
 | |
| 	/* used by the scheduler to determine a worker's last known identity */
 | |
| 	work_func_t		last_func;	/* K: last work's fn */
 | |
| 
 | |
| 	struct list_head	scheduled;	/* L: scheduled works */
 | |
| 
 | |
| 	struct task_struct	*task;		/* I: worker task */
 | |
| 	struct worker_pool	*pool;		/* A: the associated pool */
 | |
| 						/* L: for rescuers */
 | |
| 	struct list_head	node;		/* A: anchored at pool->workers */
 | |
| 						/* A: runs through worker->node */
 | |
| 
 | |
| 	unsigned long		last_active;	/* K: last active timestamp */
 | |
| 	unsigned int		flags;		/* X: flags */
 | |
| 	int			id;		/* I: worker id */
 | |
| 
 | |
| 	/*
 | |
| 	 * Opaque string set with work_set_desc().  Printed out with task
 | |
| 	 * dump for debugging - WARN, BUG, panic or sysrq.
 | |
| 	 */
 | |
| 	char			desc[WORKER_DESC_LEN];
 | |
| 
 | |
| 	/* used only by rescuers to point to the target workqueue */
 | |
| 	struct workqueue_struct	*rescue_wq;	/* I: the workqueue to rescue */
 | |
| };
 | |
| 
 | |
| /**
 | |
|  * current_wq_worker - return struct worker if %current is a workqueue worker
 | |
|  */
 | |
| static inline struct worker *current_wq_worker(void)
 | |
| {
 | |
| 	if (in_task() && (current->flags & PF_WQ_WORKER))
 | |
| 		return kthread_data(current);
 | |
| 	return NULL;
 | |
| }
 | |
| 
 | |
| /*
 | |
|  * Scheduler hooks for concurrency managed workqueue.  Only to be used from
 | |
|  * sched/ and workqueue.c.
 | |
|  */
 | |
| void wq_worker_running(struct task_struct *task);
 | |
| void wq_worker_sleeping(struct task_struct *task);
 | |
| void wq_worker_tick(struct task_struct *task);
 | |
| work_func_t wq_worker_last_func(struct task_struct *task);
 | |
| 
 | |
| #endif /* _KERNEL_WORKQUEUE_INTERNAL_H */
 |