mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-01 00:58:39 +02:00 
			
		
		
		
	 d812796eb3
			
		
	
	
		d812796eb3
		
	
	
	
	
		
			
			There was no strong reason to or not to flush barrier work items in
flush_workqueue().  And we have to make barrier work items not participate
in nr_active so we had been using WORK_NO_COLOR for them which also makes
them can't be flushed by flush_workqueue().
And the users of flush_workqueue() often do not intend to wait barrier work
items issued by flush_work().  That made the choice sound perfect.
But barrier work items have reference to internal structure (pool_workqueue)
and the worker thread[s] is/are still busy for the workqueue user when the
barrrier work items are not done.  So it is reasonable to make flush_workqueue()
also watch for flush_work() to make it more robust.
And a problem[1] reported by Li Zhe shows that we need such robustness.
The warning logs are listed below:
WARNING: CPU: 0 PID: 19336 at kernel/workqueue.c:4430 destroy_workqueue+0x11a/0x2f0
*****
destroy_workqueue: test_workqueue9 has the following busy pwq
  pwq 4: cpus=2 node=0 flags=0x0 nice=0 active=0/1 refcnt=2
      in-flight: 5658:wq_barrier_func
Showing busy workqueues and worker pools:
*****
It shows that even after drain_workqueue() returns, the barrier work item
is still in flight and the pwq (and a worker) is still busy on it.
The problem is caused by flush_workqueue() not watching flush_work():
Thread A				Worker
					/* normal work item with linked */
					process_scheduled_works()
destroy_workqueue()			  process_one_work()
  drain_workqueue()			    /* run normal work item */
				 /--	    pwq_dec_nr_in_flight()
    flush_workqueue()	    <---/
		/* the last normal work item is done */
  sanity_check				  process_one_work()
				       /--  raw_spin_unlock_irq(&pool->lock)
    raw_spin_lock_irq(&pool->lock)  <-/     /* maybe preempt */
    *WARNING*				    wq_barrier_func()
					    /* maybe preempt by cond_resched() */
Thread A can get the pool lock after the Worker unlocks the pool lock before
running wq_barrier_func().  And if there is any preemption happen around
wq_barrier_func(), destroy_workqueue()'s sanity check is more likely to
get the lock and catch it.  (Note: preemption is not necessary to cause the bug,
the unlocking is enough to possibly trigger the WARNING.)
A simple solution might be just executing all linked barrier work items
once without releasing pool lock after the head work item's
pwq_dec_nr_in_flight().  But this solution has two problems:
  1) the head work item might also be barrier work item when the user-queued
     work item is cancelled. For example:
	thread 1:		thread 2:
	queue_work(wq, &my_work)
	flush_work(&my_work)
				cancel_work_sync(&my_work);
	/* Neiter my_work nor the barrier work is scheduled. */
				destroy_workqueue(wq);
	/* This is an easier way to catch the WARNING. */
  2) there might be too much linked barrier work items and running them
     all once without releasing pool lock just causes trouble.
The only solution is to make flush_workqueue() aslo watch barrier work
items.  So we have to assign a color to these barrier work items which
is the color of the head (user-queued) work item.
Assigning a color doesn't cause any problem in ative management, because
the prvious patch made barrier work items not participate in nr_active
via WORK_STRUCT_INACTIVE rather than reliance on the (old) WORK_NO_COLOR.
[1]: https://lore.kernel.org/lkml/20210812083814.32453-1-lizhe.67@bytedance.com/
Reported-by: Li Zhe <lizhe.67@bytedance.com>
Signed-off-by: Lai Jiangshan <laijs@linux.alibaba.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
		
	
			
		
			
				
	
	
		
			81 lines
		
	
	
	
		
			2.4 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			81 lines
		
	
	
	
		
			2.4 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| /* SPDX-License-Identifier: GPL-2.0 */
 | |
| /*
 | |
|  * kernel/workqueue_internal.h
 | |
|  *
 | |
|  * Workqueue internal header file.  Only to be included by workqueue and
 | |
|  * core kernel subsystems.
 | |
|  */
 | |
| #ifndef _KERNEL_WORKQUEUE_INTERNAL_H
 | |
| #define _KERNEL_WORKQUEUE_INTERNAL_H
 | |
| 
 | |
| #include <linux/workqueue.h>
 | |
| #include <linux/kthread.h>
 | |
| #include <linux/preempt.h>
 | |
| 
 | |
| struct worker_pool;
 | |
| 
 | |
| /*
 | |
|  * The poor guys doing the actual heavy lifting.  All on-duty workers are
 | |
|  * either serving the manager role, on idle list or on busy hash.  For
 | |
|  * details on the locking annotation (L, I, X...), refer to workqueue.c.
 | |
|  *
 | |
|  * Only to be used in workqueue and async.
 | |
|  */
 | |
| struct worker {
 | |
| 	/* on idle list while idle, on busy hash table while busy */
 | |
| 	union {
 | |
| 		struct list_head	entry;	/* L: while idle */
 | |
| 		struct hlist_node	hentry;	/* L: while busy */
 | |
| 	};
 | |
| 
 | |
| 	struct work_struct	*current_work;	/* L: work being processed */
 | |
| 	work_func_t		current_func;	/* L: current_work's fn */
 | |
| 	struct pool_workqueue	*current_pwq;	/* L: current_work's pwq */
 | |
| 	unsigned int		current_color;	/* L: current_work's color */
 | |
| 	struct list_head	scheduled;	/* L: scheduled works */
 | |
| 
 | |
| 	/* 64 bytes boundary on 64bit, 32 on 32bit */
 | |
| 
 | |
| 	struct task_struct	*task;		/* I: worker task */
 | |
| 	struct worker_pool	*pool;		/* A: the associated pool */
 | |
| 						/* L: for rescuers */
 | |
| 	struct list_head	node;		/* A: anchored at pool->workers */
 | |
| 						/* A: runs through worker->node */
 | |
| 
 | |
| 	unsigned long		last_active;	/* L: last active timestamp */
 | |
| 	unsigned int		flags;		/* X: flags */
 | |
| 	int			id;		/* I: worker id */
 | |
| 	int			sleeping;	/* None */
 | |
| 
 | |
| 	/*
 | |
| 	 * Opaque string set with work_set_desc().  Printed out with task
 | |
| 	 * dump for debugging - WARN, BUG, panic or sysrq.
 | |
| 	 */
 | |
| 	char			desc[WORKER_DESC_LEN];
 | |
| 
 | |
| 	/* used only by rescuers to point to the target workqueue */
 | |
| 	struct workqueue_struct	*rescue_wq;	/* I: the workqueue to rescue */
 | |
| 
 | |
| 	/* used by the scheduler to determine a worker's last known identity */
 | |
| 	work_func_t		last_func;
 | |
| };
 | |
| 
 | |
| /**
 | |
|  * current_wq_worker - return struct worker if %current is a workqueue worker
 | |
|  */
 | |
| static inline struct worker *current_wq_worker(void)
 | |
| {
 | |
| 	if (in_task() && (current->flags & PF_WQ_WORKER))
 | |
| 		return kthread_data(current);
 | |
| 	return NULL;
 | |
| }
 | |
| 
 | |
| /*
 | |
|  * Scheduler hooks for concurrency managed workqueue.  Only to be used from
 | |
|  * sched/ and workqueue.c.
 | |
|  */
 | |
| void wq_worker_running(struct task_struct *task);
 | |
| void wq_worker_sleeping(struct task_struct *task);
 | |
| work_func_t wq_worker_last_func(struct task_struct *task);
 | |
| 
 | |
| #endif /* _KERNEL_WORKQUEUE_INTERNAL_H */
 |