mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 10:40:15 +02:00 
			
		
		
		
	workqueue: map an unbound workqueues to multiple per-node pool_workqueues
Currently, an unbound workqueue has only one "current" pool_workqueue
associated with it.  It may have multple pool_workqueues but only the
first pool_workqueue servies new work items.  For NUMA affinity, we
want to change this so that there are multiple current pool_workqueues
serving different NUMA nodes.
Introduce workqueue->numa_pwq_tbl[] which is indexed by NUMA node and
points to the pool_workqueue to use for each possible node.  This
replaces first_pwq() in __queue_work() and workqueue_congested().
numa_pwq_tbl[] is currently initialized to point to the same
pool_workqueue as first_pwq() so this patch doesn't make any behavior
changes.
v2: Use rcu_dereference_raw() in unbound_pwq_by_node() as the function
    may be called only with wq->mutex held.
Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Lai Jiangshan <laijs@cn.fujitsu.com>
			
			
This commit is contained in:
		
							parent
							
								
									2728fd2f09
								
							
						
					
					
						commit
						df2d5ae499
					
				
					 1 changed files with 37 additions and 11 deletions
				
			
		| 
						 | 
					@ -257,6 +257,7 @@ struct workqueue_struct {
 | 
				
			||||||
	/* hot fields used during command issue, aligned to cacheline */
 | 
						/* hot fields used during command issue, aligned to cacheline */
 | 
				
			||||||
	unsigned int		flags ____cacheline_aligned; /* WQ: WQ_* flags */
 | 
						unsigned int		flags ____cacheline_aligned; /* WQ: WQ_* flags */
 | 
				
			||||||
	struct pool_workqueue __percpu *cpu_pwqs; /* I: per-cpu pwqs */
 | 
						struct pool_workqueue __percpu *cpu_pwqs; /* I: per-cpu pwqs */
 | 
				
			||||||
 | 
						struct pool_workqueue __rcu *numa_pwq_tbl[]; /* FR: unbound pwqs indexed by node */
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static struct kmem_cache *pwq_cache;
 | 
					static struct kmem_cache *pwq_cache;
 | 
				
			||||||
| 
						 | 
					@ -525,6 +526,22 @@ static struct pool_workqueue *first_pwq(struct workqueue_struct *wq)
 | 
				
			||||||
				      pwqs_node);
 | 
									      pwqs_node);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/**
 | 
				
			||||||
 | 
					 * unbound_pwq_by_node - return the unbound pool_workqueue for the given node
 | 
				
			||||||
 | 
					 * @wq: the target workqueue
 | 
				
			||||||
 | 
					 * @node: the node ID
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * This must be called either with pwq_lock held or sched RCU read locked.
 | 
				
			||||||
 | 
					 * If the pwq needs to be used beyond the locking in effect, the caller is
 | 
				
			||||||
 | 
					 * responsible for guaranteeing that the pwq stays online.
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					static struct pool_workqueue *unbound_pwq_by_node(struct workqueue_struct *wq,
 | 
				
			||||||
 | 
											  int node)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						assert_rcu_or_wq_mutex(wq);
 | 
				
			||||||
 | 
						return rcu_dereference_raw(wq->numa_pwq_tbl[node]);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static unsigned int work_color_to_flags(int color)
 | 
					static unsigned int work_color_to_flags(int color)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	return color << WORK_STRUCT_COLOR_SHIFT;
 | 
						return color << WORK_STRUCT_COLOR_SHIFT;
 | 
				
			||||||
| 
						 | 
					@ -1278,14 +1295,14 @@ static void __queue_work(int cpu, struct workqueue_struct *wq,
 | 
				
			||||||
	    WARN_ON_ONCE(!is_chained_work(wq)))
 | 
						    WARN_ON_ONCE(!is_chained_work(wq)))
 | 
				
			||||||
		return;
 | 
							return;
 | 
				
			||||||
retry:
 | 
					retry:
 | 
				
			||||||
 | 
						if (req_cpu == WORK_CPU_UNBOUND)
 | 
				
			||||||
 | 
							cpu = raw_smp_processor_id();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/* pwq which will be used unless @work is executing elsewhere */
 | 
						/* pwq which will be used unless @work is executing elsewhere */
 | 
				
			||||||
	if (!(wq->flags & WQ_UNBOUND)) {
 | 
						if (!(wq->flags & WQ_UNBOUND))
 | 
				
			||||||
		if (cpu == WORK_CPU_UNBOUND)
 | 
					 | 
				
			||||||
			cpu = raw_smp_processor_id();
 | 
					 | 
				
			||||||
		pwq = per_cpu_ptr(wq->cpu_pwqs, cpu);
 | 
							pwq = per_cpu_ptr(wq->cpu_pwqs, cpu);
 | 
				
			||||||
	} else {
 | 
						else
 | 
				
			||||||
		pwq = first_pwq(wq);
 | 
							pwq = unbound_pwq_by_node(wq, cpu_to_node(cpu));
 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/*
 | 
						/*
 | 
				
			||||||
	 * If @work was previously on a different pool, it might still be
 | 
						 * If @work was previously on a different pool, it might still be
 | 
				
			||||||
| 
						 | 
					@ -1315,8 +1332,8 @@ static void __queue_work(int cpu, struct workqueue_struct *wq,
 | 
				
			||||||
	 * pwq is determined and locked.  For unbound pools, we could have
 | 
						 * pwq is determined and locked.  For unbound pools, we could have
 | 
				
			||||||
	 * raced with pwq release and it could already be dead.  If its
 | 
						 * raced with pwq release and it could already be dead.  If its
 | 
				
			||||||
	 * refcnt is zero, repeat pwq selection.  Note that pwqs never die
 | 
						 * refcnt is zero, repeat pwq selection.  Note that pwqs never die
 | 
				
			||||||
	 * without another pwq replacing it as the first pwq or while a
 | 
						 * without another pwq replacing it in the numa_pwq_tbl or while
 | 
				
			||||||
	 * work item is executing on it, so the retying is guaranteed to
 | 
						 * work items are executing on it, so the retrying is guaranteed to
 | 
				
			||||||
	 * make forward-progress.
 | 
						 * make forward-progress.
 | 
				
			||||||
	 */
 | 
						 */
 | 
				
			||||||
	if (unlikely(!pwq->refcnt)) {
 | 
						if (unlikely(!pwq->refcnt)) {
 | 
				
			||||||
| 
						 | 
					@ -3614,6 +3631,8 @@ static void init_and_link_pwq(struct pool_workqueue *pwq,
 | 
				
			||||||
			      struct worker_pool *pool,
 | 
								      struct worker_pool *pool,
 | 
				
			||||||
			      struct pool_workqueue **p_last_pwq)
 | 
								      struct pool_workqueue **p_last_pwq)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
 | 
						int node;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	BUG_ON((unsigned long)pwq & WORK_STRUCT_FLAG_MASK);
 | 
						BUG_ON((unsigned long)pwq & WORK_STRUCT_FLAG_MASK);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	pwq->pool = pool;
 | 
						pwq->pool = pool;
 | 
				
			||||||
| 
						 | 
					@ -3640,8 +3659,11 @@ static void init_and_link_pwq(struct pool_workqueue *pwq,
 | 
				
			||||||
	/* link in @pwq */
 | 
						/* link in @pwq */
 | 
				
			||||||
	list_add_rcu(&pwq->pwqs_node, &wq->pwqs);
 | 
						list_add_rcu(&pwq->pwqs_node, &wq->pwqs);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (wq->flags & WQ_UNBOUND)
 | 
						if (wq->flags & WQ_UNBOUND) {
 | 
				
			||||||
		copy_workqueue_attrs(wq->unbound_attrs, pool->attrs);
 | 
							copy_workqueue_attrs(wq->unbound_attrs, pool->attrs);
 | 
				
			||||||
 | 
							for_each_node(node)
 | 
				
			||||||
 | 
								rcu_assign_pointer(wq->numa_pwq_tbl[node], pwq);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	mutex_unlock(&wq->mutex);
 | 
						mutex_unlock(&wq->mutex);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
| 
						 | 
					@ -3761,12 +3783,16 @@ struct workqueue_struct *__alloc_workqueue_key(const char *fmt,
 | 
				
			||||||
					       struct lock_class_key *key,
 | 
										       struct lock_class_key *key,
 | 
				
			||||||
					       const char *lock_name, ...)
 | 
										       const char *lock_name, ...)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
 | 
						size_t tbl_size = 0;
 | 
				
			||||||
	va_list args;
 | 
						va_list args;
 | 
				
			||||||
	struct workqueue_struct *wq;
 | 
						struct workqueue_struct *wq;
 | 
				
			||||||
	struct pool_workqueue *pwq;
 | 
						struct pool_workqueue *pwq;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/* allocate wq and format name */
 | 
						/* allocate wq and format name */
 | 
				
			||||||
	wq = kzalloc(sizeof(*wq), GFP_KERNEL);
 | 
						if (flags & WQ_UNBOUND)
 | 
				
			||||||
 | 
							tbl_size = wq_numa_tbl_len * sizeof(wq->numa_pwq_tbl[0]);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						wq = kzalloc(sizeof(*wq) + tbl_size, GFP_KERNEL);
 | 
				
			||||||
	if (!wq)
 | 
						if (!wq)
 | 
				
			||||||
		return NULL;
 | 
							return NULL;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -3994,7 +4020,7 @@ bool workqueue_congested(int cpu, struct workqueue_struct *wq)
 | 
				
			||||||
	if (!(wq->flags & WQ_UNBOUND))
 | 
						if (!(wq->flags & WQ_UNBOUND))
 | 
				
			||||||
		pwq = per_cpu_ptr(wq->cpu_pwqs, cpu);
 | 
							pwq = per_cpu_ptr(wq->cpu_pwqs, cpu);
 | 
				
			||||||
	else
 | 
						else
 | 
				
			||||||
		pwq = first_pwq(wq);
 | 
							pwq = unbound_pwq_by_node(wq, cpu_to_node(cpu));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	ret = !list_empty(&pwq->delayed_works);
 | 
						ret = !list_empty(&pwq->delayed_works);
 | 
				
			||||||
	rcu_read_unlock_sched();
 | 
						rcu_read_unlock_sched();
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue