mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 10:40:15 +02:00 
			
		
		
		
	workqueue: Use a kthread_worker to release pool_workqueues
pool_workqueue release path is currently bounced to system_wq; however, this is a bit tricky because this bouncing occurs while holding a pool lock and thus has risk of causing a A-A deadlock. This is currently addressed by the fact that only unbound workqueues use this bouncing path and system_wq is a per-cpu workqueue. While this works, it's brittle and requires a work-around like setting the lockdep subclass for the lock of unbound pools. Besides, future changes will use the bouncing path for per-cpu workqueues too making the current approach unusable. Let's just use a dedicated kthread_worker to untangle the dependency. This is just one more kthread for all workqueues and makes the pwq release logic simpler and more robust. Signed-off-by: Tejun Heo <tj@kernel.org>
This commit is contained in:
		
							parent
							
								
									fcecfa8f27
								
							
						
					
					
						commit
						967b494e2f
					
				
					 1 changed files with 23 additions and 17 deletions
				
			
		| 
						 | 
					@ -257,12 +257,12 @@ struct pool_workqueue {
 | 
				
			||||||
	u64			stats[PWQ_NR_STATS];
 | 
						u64			stats[PWQ_NR_STATS];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/*
 | 
						/*
 | 
				
			||||||
	 * Release of unbound pwq is punted to system_wq.  See put_pwq()
 | 
						 * Release of unbound pwq is punted to a kthread_worker. See put_pwq()
 | 
				
			||||||
	 * and pwq_unbound_release_workfn() for details.  pool_workqueue
 | 
						 * and pwq_unbound_release_workfn() for details. pool_workqueue itself
 | 
				
			||||||
	 * itself is also RCU protected so that the first pwq can be
 | 
						 * is also RCU protected so that the first pwq can be determined without
 | 
				
			||||||
	 * determined without grabbing wq->mutex.
 | 
						 * grabbing wq->mutex.
 | 
				
			||||||
	 */
 | 
						 */
 | 
				
			||||||
	struct work_struct	unbound_release_work;
 | 
						struct kthread_work	unbound_release_work;
 | 
				
			||||||
	struct rcu_head		rcu;
 | 
						struct rcu_head		rcu;
 | 
				
			||||||
} __aligned(1 << WORK_STRUCT_FLAG_BITS);
 | 
					} __aligned(1 << WORK_STRUCT_FLAG_BITS);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -395,6 +395,13 @@ static struct workqueue_attrs *unbound_std_wq_attrs[NR_STD_WORKER_POOLS];
 | 
				
			||||||
/* I: attributes used when instantiating ordered pools on demand */
 | 
					/* I: attributes used when instantiating ordered pools on demand */
 | 
				
			||||||
static struct workqueue_attrs *ordered_wq_attrs[NR_STD_WORKER_POOLS];
 | 
					static struct workqueue_attrs *ordered_wq_attrs[NR_STD_WORKER_POOLS];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * I: kthread_worker to release pwq's. pwq release needs to be bounced to a
 | 
				
			||||||
 | 
					 * process context while holding a pool lock. Bounce to a dedicated kthread
 | 
				
			||||||
 | 
					 * worker to avoid A-A deadlocks.
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					static struct kthread_worker *pwq_release_worker;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
struct workqueue_struct *system_wq __read_mostly;
 | 
					struct workqueue_struct *system_wq __read_mostly;
 | 
				
			||||||
EXPORT_SYMBOL(system_wq);
 | 
					EXPORT_SYMBOL(system_wq);
 | 
				
			||||||
struct workqueue_struct *system_highpri_wq __read_mostly;
 | 
					struct workqueue_struct *system_highpri_wq __read_mostly;
 | 
				
			||||||
| 
						 | 
					@ -1366,14 +1373,10 @@ static void put_pwq(struct pool_workqueue *pwq)
 | 
				
			||||||
	if (WARN_ON_ONCE(!(pwq->wq->flags & WQ_UNBOUND)))
 | 
						if (WARN_ON_ONCE(!(pwq->wq->flags & WQ_UNBOUND)))
 | 
				
			||||||
		return;
 | 
							return;
 | 
				
			||||||
	/*
 | 
						/*
 | 
				
			||||||
	 * @pwq can't be released under pool->lock, bounce to
 | 
						 * @pwq can't be released under pool->lock, bounce to a dedicated
 | 
				
			||||||
	 * pwq_unbound_release_workfn().  This never recurses on the same
 | 
						 * kthread_worker to avoid A-A deadlocks.
 | 
				
			||||||
	 * pool->lock as this path is taken only for unbound workqueues and
 | 
					 | 
				
			||||||
	 * the release work item is scheduled on a per-cpu workqueue.  To
 | 
					 | 
				
			||||||
	 * avoid lockdep warning, unbound pool->locks are given lockdep
 | 
					 | 
				
			||||||
	 * subclass of 1 in get_unbound_pool().
 | 
					 | 
				
			||||||
	 */
 | 
						 */
 | 
				
			||||||
	schedule_work(&pwq->unbound_release_work);
 | 
						kthread_queue_work(pwq_release_worker, &pwq->unbound_release_work);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/**
 | 
					/**
 | 
				
			||||||
| 
						 | 
					@ -3965,7 +3968,6 @@ static struct worker_pool *get_unbound_pool(const struct workqueue_attrs *attrs)
 | 
				
			||||||
	if (!pool || init_worker_pool(pool) < 0)
 | 
						if (!pool || init_worker_pool(pool) < 0)
 | 
				
			||||||
		goto fail;
 | 
							goto fail;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	lockdep_set_subclass(&pool->lock, 1);	/* see put_pwq() */
 | 
					 | 
				
			||||||
	copy_workqueue_attrs(pool->attrs, attrs);
 | 
						copy_workqueue_attrs(pool->attrs, attrs);
 | 
				
			||||||
	pool->node = target_node;
 | 
						pool->node = target_node;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -3999,10 +4001,10 @@ static void rcu_free_pwq(struct rcu_head *rcu)
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/*
 | 
					/*
 | 
				
			||||||
 * Scheduled on system_wq by put_pwq() when an unbound pwq hits zero refcnt
 | 
					 * Scheduled on pwq_release_worker by put_pwq() when an unbound pwq hits zero
 | 
				
			||||||
 * and needs to be destroyed.
 | 
					 * refcnt and needs to be destroyed.
 | 
				
			||||||
 */
 | 
					 */
 | 
				
			||||||
static void pwq_unbound_release_workfn(struct work_struct *work)
 | 
					static void pwq_unbound_release_workfn(struct kthread_work *work)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct pool_workqueue *pwq = container_of(work, struct pool_workqueue,
 | 
						struct pool_workqueue *pwq = container_of(work, struct pool_workqueue,
 | 
				
			||||||
						  unbound_release_work);
 | 
											  unbound_release_work);
 | 
				
			||||||
| 
						 | 
					@ -4110,7 +4112,8 @@ static void init_pwq(struct pool_workqueue *pwq, struct workqueue_struct *wq,
 | 
				
			||||||
	INIT_LIST_HEAD(&pwq->inactive_works);
 | 
						INIT_LIST_HEAD(&pwq->inactive_works);
 | 
				
			||||||
	INIT_LIST_HEAD(&pwq->pwqs_node);
 | 
						INIT_LIST_HEAD(&pwq->pwqs_node);
 | 
				
			||||||
	INIT_LIST_HEAD(&pwq->mayday_node);
 | 
						INIT_LIST_HEAD(&pwq->mayday_node);
 | 
				
			||||||
	INIT_WORK(&pwq->unbound_release_work, pwq_unbound_release_workfn);
 | 
						kthread_init_work(&pwq->unbound_release_work,
 | 
				
			||||||
 | 
								  pwq_unbound_release_workfn);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/* sync @pwq with the current state of its associated wq and link it */
 | 
					/* sync @pwq with the current state of its associated wq and link it */
 | 
				
			||||||
| 
						 | 
					@ -6433,6 +6436,9 @@ static void __init wq_cpu_intensive_thresh_init(void)
 | 
				
			||||||
	if (wq_cpu_intensive_thresh_us != ULONG_MAX)
 | 
						if (wq_cpu_intensive_thresh_us != ULONG_MAX)
 | 
				
			||||||
		return;
 | 
							return;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						pwq_release_worker = kthread_create_worker(0, "pool_workqueue_release");
 | 
				
			||||||
 | 
						BUG_ON(IS_ERR(pwq_release_worker));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/*
 | 
						/*
 | 
				
			||||||
	 * The default of 10ms is derived from the fact that most modern (as of
 | 
						 * The default of 10ms is derived from the fact that most modern (as of
 | 
				
			||||||
	 * 2023) processors can do a lot in 10ms and that it's just below what
 | 
						 * 2023) processors can do a lot in 10ms and that it's just below what
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue